Exemplo n.º 1
0
def duplicate_sequence(chrSeq,
                       dupStart,
                       dupStop,
                       insertLoc,
                       numDup=1,
                       invert=False):

    duplication = str(chrSeq[int(dupStart) - 1:int(dupStop)]) * int(numDup)
    if invert == True:
        MutableSeq.reverse(duplication)
    begin = MutableSeq.__add__(chrSeq[:int(insertLoc)], duplication)
    chrSeq = MutableSeq.__add__(begin, chrSeq[int(insertLoc):])
    return chrSeq
Exemplo n.º 2
0
 def get_optimal_alignment(self):
     """Follow the traceback to get the optimal alignment."""
     # intialize the two sequences which will return the alignment
     align_seq1 = MutableSeq(array.array("c"), 
       Alphabet.Gapped(IUPAC.protein, GAP_CHAR))
     align_seq2 = MutableSeq(array.array("c"), 
       Alphabet.Gapped(IUPAC.protein, GAP_CHAR))
       
     # take care of the initial case with the bottom corner matrix
     # item
     current_cell = self.dpmatrix[(len(self.seq1), len(self.seq2))]
     align_seq1.append(current_cell.seq1item)
     align_seq2.append(current_cell.seq2item)
     
     next_cell = current_cell.get_parent()
     current_cell = next_cell
     next_cell = current_cell.get_parent()
     
     # keeping adding sequence until we reach (0, 0)
     while next_cell:
         # add the new sequence--three cases:
         # 1. Move up diaganolly, add a new seq1 and seq2 to the 
         # aligned sequences
         if ((next_cell.col_pos == current_cell.col_pos - 1) and
           (next_cell.row_pos == current_cell.row_pos - 1)):
             # print "case 1 -> seq1 %s, seq2 %s" % (
             # current_cell.seq1item, current_cell.seq2item)
             align_seq1.append(current_cell.seq1item)
             align_seq2.append(current_cell.seq2item)
         # 2. Move upwards, add a new seq2 and a gap in seq1
         elif ((next_cell.col_pos  == current_cell.col_pos) and
           (next_cell.row_pos == current_cell.row_pos - 1)):
             #print "case 2 -> seq2 %s" % current_cell.seq2item
             align_seq1.append(GAP_CHAR)
             align_seq2.append(current_cell.seq2item)
         # 3. Move to the right, add a new seq1 and a gap in seq2
         elif ((next_cell.col_pos == current_cell.col_pos - 1) and
           (next_cell.row_pos == current_cell.row_pos)):
             #print "case 3 -> seq1 % s" % current_cell.seq1item
             align_seq1.append(current_cell.seq1item)
             align_seq2.append(GAP_CHAR)
         
         # now move on to the next sequence
         current_cell = next_cell
         next_cell = current_cell.get_parent()
     
     # reverse the returned alignments since we are reading them in
     # backwards
     align_seq1.reverse()
     align_seq2.reverse()
     return align_seq1.toseq(), align_seq2.toseq()
Exemplo n.º 3
0
class TestMutableSeq(unittest.TestCase):
    def setUp(self):
        self.s = Seq.Seq("TCAAAAGGATGCATCATG")
        self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG")

    def test_mutableseq_creation(self):
        """Test creating MutableSeqs in multiple ways."""
        mutable_s = MutableSeq("TCAAAAGGATGCATCATG")
        self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq")

        mutable_s = self.s.tomutable()
        self.assertIsInstance(mutable_s, MutableSeq,
                              "Converting Seq to mutable")

        array_seq = MutableSeq(array.array("u", "TCAAAAGGATGCATCATG"))
        self.assertIsInstance(array_seq, MutableSeq,
                              "Creating MutableSeq using array")

    def test_repr(self):
        self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG')",
                         repr(self.mutable_s))

    def test_truncated_repr(self):
        seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA"
        expected = (
            "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA')"
        )
        self.assertEqual(expected, repr(MutableSeq(seq)))

    def test_equal_comparison(self):
        """Test __eq__ comparison method."""
        self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG")

    def test_not_equal_comparison(self):
        """Test __ne__ comparison method."""
        self.assertNotEqual(self.mutable_s, "other thing")

    def test_less_than_comparison(self):
        """Test __lt__ comparison method."""
        self.assertLess(self.mutable_s[:-1], self.mutable_s)

    def test_less_than_comparison_of_incompatible_types(self):
        with self.assertRaises(TypeError):
            self.mutable_s < 1

    def test_less_than_comparison_without_alphabet(self):
        self.assertLessEqual(self.mutable_s[:-1], "TCAAAAGGATGCATCATG")

    def test_less_than_or_equal_comparison(self):
        """Test __le__ comparison method."""
        self.assertLessEqual(self.mutable_s[:-1], self.mutable_s)

    def test_less_than_or_equal_comparison_of_incompatible_types(self):
        with self.assertRaises(TypeError):
            self.mutable_s <= 1

    def test_less_than_or_equal_comparison_without_alphabet(self):
        self.assertLessEqual(self.mutable_s[:-1], "TCAAAAGGATGCATCATG")

    def test_greater_than_comparison(self):
        """Test __gt__ comparison method."""
        self.assertGreater(self.mutable_s, self.mutable_s[:-1])

    def test_greater_than_comparison_of_incompatible_types(self):
        with self.assertRaises(TypeError):
            self.mutable_s > 1

    def test_greater_than_comparison_without_alphabet(self):
        self.assertGreater(self.mutable_s, "TCAAAAGGATGCATCAT")

    def test_greater_than_or_equal_comparison(self):
        """Test __ge__ comparison method."""
        self.assertGreaterEqual(self.mutable_s, self.mutable_s)

    def test_greater_than_or_equal_comparison_of_incompatible_types(self):
        with self.assertRaises(TypeError):
            self.mutable_s >= 1

    def test_greater_than_or_equal_comparison_without_alphabet(self):
        self.assertGreaterEqual(self.mutable_s, "TCAAAAGGATGCATCATG")

    def test_add_method(self):
        """Test adding wrong type to MutableSeq."""
        with self.assertRaises(TypeError):
            self.mutable_s + 1234

    def test_radd_method(self):
        self.assertEqual(
            "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
            self.mutable_s.__radd__(self.mutable_s),
        )

    def test_radd_method_incompatible_alphabets(self):
        self.assertEqual(
            "UCAAAAGGATCAAAAGGATGCATCATG",
            self.mutable_s.__radd__(MutableSeq("UCAAAAGGA")),
        )

    def test_radd_method_using_seq_object(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.s))

    def test_radd_method_wrong_type(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(1234)

    def test_as_string(self):
        self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s))

    def test_length(self):
        self.assertEqual(18, len(self.mutable_s))

    def test_converting_to_immutable(self):
        self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq)

    def test_first_nucleotide(self):
        self.assertEqual("T", self.mutable_s[0])

    def test_setting_slices(self):
        self.assertEqual(
            MutableSeq("CAAA"),
            self.mutable_s[1:5],
            "Slice mutable seq",
        )

        self.mutable_s[1:3] = "GAT"
        self.assertEqual(
            MutableSeq("TGATAAAGGATGCATCATG"),
            self.mutable_s,
            "Set slice with string and adding extra nucleotide",
        )

        self.mutable_s[1:3] = self.mutable_s[5:7]
        self.assertEqual(
            MutableSeq("TAATAAAGGATGCATCATG"),
            self.mutable_s,
            "Set slice with MutableSeq",
        )

        self.mutable_s[1:3] = array.array("u", "GAT")
        self.assertEqual(
            MutableSeq("TGATTAAAGGATGCATCATG"),
            self.mutable_s,
            "Set slice with array",
        )

    def test_setting_item(self):
        self.mutable_s[3] = "G"
        self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG"), self.mutable_s)

    def test_deleting_slice(self):
        del self.mutable_s[4:5]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)

    def test_deleting_item(self):
        del self.mutable_s[3]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)

    def test_appending(self):
        self.mutable_s.append("C")
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC"), self.mutable_s)

    def test_inserting(self):
        self.mutable_s.insert(4, "G")
        self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG"), self.mutable_s)

    def test_popping_last_item(self):
        self.assertEqual("G", self.mutable_s.pop())

    def test_remove_items(self):
        self.mutable_s.remove("G")
        self.assertEqual(MutableSeq("TCAAAAGATGCATCATG"), self.mutable_s,
                         "Remove first G")

        self.assertRaises(ValueError, self.mutable_s.remove, "Z")

    def test_count(self):
        self.assertEqual(7, self.mutable_s.count("A"))
        self.assertEqual(2, self.mutable_s.count("AA"))

    def test_index(self):
        self.assertEqual(2, self.mutable_s.index("A"))
        self.assertRaises(ValueError, self.mutable_s.index, "8888")

    def test_reverse(self):
        """Test using reverse method."""
        self.mutable_s.reverse()
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT"), self.mutable_s)

    def test_reverse_with_stride(self):
        """Test reverse using -1 stride."""
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT"),
                         self.mutable_s[::-1])

    def test_complement(self):
        self.mutable_s.complement()
        self.assertEqual("AGTTTTCCTACGTAGTAC", str(self.mutable_s))

    def test_complement_rna(self):
        seq = Seq.MutableSeq("AUGaaaCUG")
        seq.complement()
        self.assertEqual("UACuuuGAC", str(seq))

    def test_complement_mixed_aphabets(self):
        seq = Seq.MutableSeq("AUGaaaCTG")
        with self.assertRaises(ValueError):
            seq.complement()

    def test_complement_rna_string(self):
        seq = Seq.MutableSeq("AUGaaaCUG")
        seq.complement()
        self.assertEqual("UACuuuGAC", str(seq))

    def test_complement_dna_string(self):
        seq = Seq.MutableSeq("ATGaaaCTG")
        seq.complement()
        self.assertEqual("TACtttGAC", str(seq))

    def test_reverse_complement(self):
        self.mutable_s.reverse_complement()
        self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s))

    def test_extend_method(self):
        self.mutable_s.extend("GAT")
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT"), self.mutable_s)

    def test_extend_with_mutable_seq(self):
        self.mutable_s.extend(MutableSeq("TTT"))
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT"), self.mutable_s)

    def test_delete_stride_slice(self):
        del self.mutable_s[4:6 - 1]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)

    def test_extract_third_nucleotide(self):
        """Test extracting every third nucleotide (slicing with stride 3)."""
        self.assertEqual(MutableSeq("TAGTAA"), self.mutable_s[0::3])
        self.assertEqual(MutableSeq("CAGGTT"), self.mutable_s[1::3])
        self.assertEqual(MutableSeq("AAACCG"), self.mutable_s[2::3])

    def test_set_wobble_codon_to_n(self):
        """Test setting wobble codon to N (set slice with stride 3)."""
        self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3])
        self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN"), self.mutable_s)
Exemplo n.º 4
0
    def viterbi(self, sequence, state_alphabet):
        """Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:

        o sequence -- A Seq object with the emission sequence that we
        want to decode.

        o state_alphabet -- The alphabet of the possible state sequences
        that can be generated.
        """

        # calculate logarithms of the initial, transition, and emission probs
        log_initial = self._log_transform(self.initial_prob)
        log_trans = self._log_transform(self.transition_prob)
        log_emission = self._log_transform(self.emission_prob)

        viterbi_probs = {}
        pred_state_seq = {}
        state_letters = state_alphabet.letters

        # --- recursion
        # loop over the training squence (i = 1 .. L)
        # NOTE: My index numbers are one less than what is given in Durbin
        # et al, since we are indexing the sequence going from 0 to
        # (Length - 1) not 1 to Length, like in Durbin et al.
        for i in range(0, len(sequence)):
            # loop over all of the possible i-th states in the state path
            for cur_state in state_letters:
                # e_{l}(x_{i})
                emission_part = log_emission[(cur_state, sequence[i])]

                max_prob = 0
                if i == 0:
                    # for the first state, use the initial probability rather
                    # than looking back to previous states
                    max_prob = log_initial[cur_state]
                else:
                    # loop over all possible (i-1)-th previous states
                    possible_state_probs = {}
                    for prev_state in self.transitions_to(cur_state):
                        # a_{kl}
                        trans_part = log_trans[(prev_state, cur_state)]

                        # v_{k}(i - 1)
                        viterbi_part = viterbi_probs[(prev_state, i - 1)]
                        cur_prob = viterbi_part + trans_part

                        possible_state_probs[prev_state] = cur_prob

                    # calculate the viterbi probability using the max
                    max_prob = max(possible_state_probs.values())

                # v_{k}(i)
                viterbi_probs[(cur_state, i)] = (emission_part + max_prob)

                if i > 0:
                    # get the most likely prev_state leading to cur_state
                    for state in possible_state_probs:
                        if possible_state_probs[state] == max_prob:
                            pred_state_seq[(i - 1, cur_state)] = state
                            break
                    
        # --- termination
        # calculate the probability of the state path
        # loop over all states
        all_probs = {}
        for state in state_letters:
            # v_{k}(L)
            all_probs[state] = viterbi_probs[(state, len(sequence) - 1)]

        state_path_prob = max(all_probs.values())

        # find the last pointer we need to trace back from
        last_state = ''
        for state in all_probs:
            if all_probs[state] == state_path_prob:
                last_state = state

        assert last_state != '', "Didn't find the last state to trace from!"
                
        # --- traceback
        traceback_seq = MutableSeq('', state_alphabet)
        
        loop_seq = range(1, len(sequence))
        loop_seq.reverse()

        # last_state is the last state in the most probable state sequence.
        # Compute that sequence by walking backwards in time. From the i-th
        # state in the sequence, find the (i-1)-th state as the most
        # probable state preceding the i-th state.
        state = last_state
        traceback_seq.append(state)
        for i in loop_seq:
            state = pred_state_seq[(i - 1, state)]
            traceback_seq.append(state)

        # put the traceback sequence in the proper orientation
        traceback_seq.reverse()

        return traceback_seq.toseq(), state_path_prob
Exemplo n.º 5
0
    def viterbi(self, sequence, state_alphabet):
        """Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:

        o sequence -- A Seq object with the emission sequence that we
        want to decode.

        o state_alphabet -- The alphabet of the possible state sequences
        that can be generated.
        """

        # calculate logarithms of the initial, transition, and emission probs
        log_initial = self._log_transform(self.initial_prob)
        log_trans = self._log_transform(self.transition_prob)
        log_emission = self._log_transform(self.emission_prob)

        viterbi_probs = {}
        pred_state_seq = {}
        state_letters = state_alphabet.letters

        # --- recursion
        # loop over the training squence (i = 1 .. L)
        # NOTE: My index numbers are one less than what is given in Durbin
        # et al, since we are indexing the sequence going from 0 to
        # (Length - 1) not 1 to Length, like in Durbin et al.
        for i in range(0, len(sequence)):
            # loop over all of the possible i-th states in the state path
            for cur_state in state_letters:
                # e_{l}(x_{i})
                emission_part = log_emission[(cur_state, sequence[i])]

                max_prob = 0
                if i == 0:
                    # for the first state, use the initial probability rather
                    # than looking back to previous states
                    max_prob = log_initial[cur_state]
                else:
                    # loop over all possible (i-1)-th previous states
                    possible_state_probs = {}
                    for prev_state in self.transitions_to(cur_state):
                        # a_{kl}
                        trans_part = log_trans[(prev_state, cur_state)]

                        # v_{k}(i - 1)
                        viterbi_part = viterbi_probs[(prev_state, i - 1)]
                        cur_prob = viterbi_part + trans_part

                        possible_state_probs[prev_state] = cur_prob

                    # calculate the viterbi probability using the max
                    max_prob = max(possible_state_probs.values())

                # v_{k}(i)
                viterbi_probs[(cur_state, i)] = (emission_part + max_prob)

                if i > 0:
                    # get the most likely prev_state leading to cur_state
                    for state in possible_state_probs:
                        if possible_state_probs[state] == max_prob:
                            pred_state_seq[(i - 1, cur_state)] = state
                            break

        # --- termination
        # calculate the probability of the state path
        # loop over all states
        all_probs = {}
        for state in state_letters:
            # v_{k}(L)
            all_probs[state] = viterbi_probs[(state, len(sequence) - 1)]

        state_path_prob = max(all_probs.values())

        # find the last pointer we need to trace back from
        last_state = ''
        for state in all_probs:
            if all_probs[state] == state_path_prob:
                last_state = state

        assert last_state != '', "Didn't find the last state to trace from!"

        # --- traceback
        traceback_seq = MutableSeq('', state_alphabet)

        loop_seq = list(range(1, len(sequence)))
        loop_seq.reverse()

        # last_state is the last state in the most probable state sequence.
        # Compute that sequence by walking backwards in time. From the i-th
        # state in the sequence, find the (i-1)-th state as the most
        # probable state preceding the i-th state.
        state = last_state
        traceback_seq.append(state)
        for i in loop_seq:
            state = pred_state_seq[(i - 1, state)]
            traceback_seq.append(state)

        # put the traceback sequence in the proper orientation
        traceback_seq.reverse()

        return traceback_seq.toseq(), state_path_prob
Exemplo n.º 6
0
mutable_seq = my_seq.tomutable()
mutable_seq
new_seq = mutable_seq.toseq()
new_seq

from Bio.Seq import MutableSeq
from Bio.Alphabet import IUPAC
mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)

mutable_seq
mutable_seq[5] = "C"
mutable_seq
mutable_seq.remove("T")
mutable_seq
mutable_seq.reverse()
mutable_seq

# UnknownSeq objects
from Bio.Seq import UnknownSeq
unk = UnknownSeq(20)
unk
print(unk)
len(unk)


from Bio.Seq import UnknownSeq
from Bio.Alphabet import IUPAC
unk_dna = UnknownSeq(20, alphabet=IUPAC.ambiguous_dna) 
unk_dna
print(unk_dna)
Exemplo n.º 7
0
    "AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT" +
    "TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT" +
    "AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA",
    generic_dna)

print(gene.translate(table="Bacterial"))
print(gene.translate(table="Bacterial", cds=True))

##查看密码子表
from Bio.Data import CodonTable
standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
mito_table = CodonTable.unambiguous_dna_by_id[2]

print(standard_table)
print(mito_table.start_codons)
print(mito_table.stop_codons)
print(mito_table.forward_table["ACG"])

##可变对象
from Bio.Seq import MutableSeq
mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
print(mutable_seq)
mutable_seq[5] = "C"
print(mutable_seq)
mutable_seq.remove("T")
print(mutable_seq)
mutable_seq.reverse()
print(mutable_seq)
new_seq = mutable_seq.toseq()
print(new_seq)
Exemplo n.º 8
0
from Bio.Seq import Seq
from Bio.Seq import MutableSeq
from Bio.Alphabet import IUPAC

my_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
# my_seq[6] = "C"
my_seq.remove("T")
my_seq.reverse()
print(repr(my_seq))
non_mutable_seq = my_seq.toseq()
Exemplo n.º 9
0
#print gene
#YAAX = yaaX.translate(table='Bacterial', cds=True, to_stop=True)
#print YAAX

#playing with codon usage tables
#from Bio.Data import CodonTable
#standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
#mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"]
#print standard_table

#mutable seq objects
from Bio.Seq import Seq
from Bio.Seq import MutableSeq
from Bio.Alphabet import IUPAC
#my_seq = Seq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
#mutable_seq = my_seq.tomutable()
#Or just create a mutable seq!
my_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
print my_seq
#my_seq_div = my_seq
#my_seq_div[5:8] = 'tag' #how to do insertions????????  only can replace as many characters as indicated.  wait it works now.  
#why 5:8?
#print my_seq #why does this print as my_seq_div with SNP?  
#print my_seq_div
#my_seq_del = my_seq_div.remove("T")
#print my_seq_del
my_seq_rev = my_seq.reverse() #should be able to do my_seq.reverse_complement() as well
print my_seq_rev #this should be working, but it returning None

fin_seq = my_seq_div.toseq() #converts back to immutable Seq Object
Exemplo n.º 10
0
    def viterbi(self, sequence, state_alphabet):
        """Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:

        o sequence -- A Seq object with the emission sequence that we
        want to decode.

        o state_alphabet -- The alphabet of the possible state sequences
        that can be generated.
        """
        # calculate logarithms of the transition and emission probs
        log_trans = self._log_transform(self.transition_prob)
        log_emission = self._log_transform(self.emission_prob)

        viterbi_probs = {}
        pred_state_seq = {}
        state_letters = state_alphabet.letters
        # --- initialization
        #
        # NOTE: My index numbers are one less than what is given in Durbin
        # et al, since we are indexing the sequence going from 0 to
        # (Length - 1) not 1 to Length, like in Durbin et al.
        #
        # v_{0}(0) = 1
        viterbi_probs[(state_letters[0], -1)] = 1
        # v_{k}(0) = 0 for k > 0
        for state_letter in state_letters[1:]:
            viterbi_probs[(state_letter, -1)] = 0

        # --- recursion
        # loop over the training squence (i = 1 .. L)
        for i in range(0, len(sequence)):
            # now loop over all of the letters in the state path
            for main_state in state_letters:
                # e_{l}(x_{i})
                emission_part = log_emission[(main_state, sequence[i])]

                # loop over all possible states
                possible_state_probs = {}
                for cur_state in self.transitions_from(main_state):
                    # a_{kl}
                    trans_part = log_trans[(cur_state, main_state)]

                    # v_{k}(i - 1)
                    viterbi_part = viterbi_probs[(cur_state, i - 1)]
                    cur_prob = viterbi_part + trans_part

                    possible_state_probs[cur_state] = cur_prob

                # finally calculate the viterbi probability using the max
                max_prob = max(possible_state_probs.values())
                viterbi_probs[(main_state, i)] = (emission_part + max_prob)

                # now get the most likely state
                for state in possible_state_probs:
                    if possible_state_probs[state] == max_prob:
                        pred_state_seq[(i - 1, main_state)] = state
                        break
                    
        # --- termination
        # calculate the probability of the state path
        # loop over all letters
        all_probs = {}
        for state in state_letters:
            # v_{k}(L)
            viterbi_part = viterbi_probs[(state, len(sequence) - 1)]
            # a_{k0}
            transition_part = log_trans[(state, state_letters[0])]

            all_probs[state] = viterbi_part * transition_part

        state_path_prob = max(all_probs.values())

        # find the last pointer we need to trace back from
        last_state = ''
        for state in all_probs:
            if all_probs[state] == state_path_prob:
                last_state = state

        assert last_state != '', "Didn't find the last state to trace from!"
                
        # --- traceback
        traceback_seq = MutableSeq('', state_alphabet)
        
        loop_seq = range(0, len(sequence))
        loop_seq.reverse()

        cur_state = last_state
        for i in loop_seq:
            traceback_seq.append(cur_state)
            
            cur_state = pred_state_seq[(i - 1, cur_state)]

        # put the traceback sequence in the proper orientation
        traceback_seq.reverse()

        return traceback_seq.toseq(), state_path_prob
Exemplo n.º 11
0
# Print the second codon position
seqs[2::3]

# Sequence Length Comparison
seq1 = Seq("TTGTGGCCGCTCAGATCAGGCAGTTTAGGCTTA")
seq2 = Seq("ATTTATAGAAATGTGGTTATTTCTTAAGCATGGC")
seq1 == seq2

# Mutable sequence 
mut_seq = MutableSeq("TTGTGGCCGCTCAGATCAGGCAGTTTAGGCTTA")
print(f'MutSeq: {mut_seq}')
mut_seq[5] == "C"
print(mut_seq)
mut_seq.remove("T") 
print(mut_seq)
mut_seq.reverse()
print(mut_seq)

!wget http://d28rh4a8wq0iu5.cloudfront.net/ads1/data/SRR835775_1.first1000.fastq

# Working with Fastq files 
for record in SeqIO.parse("SRR835775_1.first1000.fastq", "fastq"):
    print(record)
    
    print(record.seq)
    print(record.letter_annotations['phred_quality'])
    
    

quals = [record.letter_annotations['phred_quality'] for record in SeqIO.parse("SRR835775_1.first1000.fastq", "fastq")]
Exemplo n.º 12
0
class TestMutableSeq(unittest.TestCase):
    def setUp(self):
        self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna)
        self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)

    def test_mutableseq_creation(self):
        """Test creating MutableSeqs in multiple ways"""
        mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)
        self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq")

        mutable_s = self.s.tomutable()
        self.assertIsInstance(mutable_s, MutableSeq,
                              "Converting Seq to mutable")

        array_seq = MutableSeq(
            array.array(array_indicator, "TCAAAAGGATGCATCATG"),
            IUPAC.ambiguous_dna)
        self.assertIsInstance(array_seq, MutableSeq,
                              "Creating MutableSeq using array")

    def test_repr(self):
        self.assertEqual(
            "MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())",
            repr(self.mutable_s))

    def test_truncated_repr(self):
        seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA"
        expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())"
        self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna)))

    def test_equal_comparison(self):
        """Test __eq__ comparison method"""
        self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG")

    def test_equal_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna)

    def test_not_equal_comparison(self):
        """Test __ne__ comparison method"""
        self.assertNotEqual(self.mutable_s, "other thing")

    def test_less_than_comparison(self):
        """Test __lt__ comparison method"""
        self.assertTrue(self.mutable_s[:-1] < self.mutable_s)

    def test_less_than_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG",
                                             IUPAC.ambiguous_rna)

    def test_less_than_comparison_without_alphabet(self):
        self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG")

    def test_less_than_or_equal_comparison(self):
        """Test __le__ comparison method"""
        self.assertTrue(self.mutable_s[:-1] <= self.mutable_s)

    def test_less_than_or_equal_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG",
                                              IUPAC.ambiguous_rna)

    def test_less_than_or_equal_comparison_without_alphabet(self):
        self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG")

    def test_add_method(self):
        """Test adding wrong type to MutableSeq"""
        with self.assertRaises(TypeError):
            self.mutable_s + 1234

    def test_radd_method(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.mutable_s))

    def test_radd_method_incompatible_alphabets(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(
                MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna))

    def test_radd_method_using_seq_object(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.s))

    def test_radd_method_wrong_type(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(1234)

    def test_as_string(self):
        self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s))

    def test_length(self):
        self.assertEqual(18, len(self.mutable_s))

    def test_converting_to_immutable(self):
        self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq)

    def test_first_nucleotide(self):
        self.assertEqual('T', self.mutable_s[0])

    def test_setting_slices(self):
        self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna),
                         self.mutable_s[1:5], "Slice mutable seq")

        self.mutable_s[1:3] = "GAT"
        self.assertEqual(
            MutableSeq("TGATAAAGGATGCATCATG",
                       IUPAC.ambiguous_dna), self.mutable_s,
            "Set slice with string and adding extra nucleotide")

        self.mutable_s[1:3] = self.mutable_s[5:7]
        self.assertEqual(
            MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna),
            self.mutable_s, "Set slice with MutableSeq")

        self.mutable_s[1:3] = array.array(array_indicator, "GAT")
        self.assertEqual(
            MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna),
            self.mutable_s, "Set slice with array")

    def test_setting_item(self):
        self.mutable_s[3] = "G"
        self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_deleting_slice(self):
        del self.mutable_s[4:5]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_deleting_item(self):
        del self.mutable_s[3]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_appending(self):
        self.mutable_s.append("C")
        self.assertEqual(
            MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna),
            self.mutable_s)

    def test_inserting(self):
        self.mutable_s.insert(4, "G")
        self.assertEqual(
            MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna),
            self.mutable_s)

    def test_popping_last_item(self):
        self.assertEqual("G", self.mutable_s.pop())

    def test_remove_items(self):
        self.mutable_s.remove("G")
        self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s, "Remove first G")

        self.assertRaises(ValueError, self.mutable_s.remove, 'Z')

    def test_count(self):
        self.assertEqual(7, self.mutable_s.count("A"))
        self.assertEqual(2, self.mutable_s.count("AA"))

    def test_index(self):
        self.assertEqual(2, self.mutable_s.index("A"))
        self.assertRaises(ValueError, self.mutable_s.index, "8888")

    def test_reverse(self):
        """Test using reverse method"""
        self.mutable_s.reverse()
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_reverse_with_stride(self):
        """Test reverse using -1 stride"""
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna),
                         self.mutable_s[::-1])

    def test_complement(self):
        self.mutable_s.complement()
        self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s))

    def test_complement_rna(self):
        seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna)
        seq.complement()
        self.assertEqual(str("UACuuuGAC"), str(seq))

    def test_complement_mixed_aphabets(self):
        seq = Seq.MutableSeq("AUGaaaCTG")
        with self.assertRaises(ValueError):
            seq.complement()

    def test_complement_rna_string(self):
        seq = Seq.MutableSeq("AUGaaaCUG")
        seq.complement()
        self.assertEqual('UACuuuGAC', str(seq))

    def test_complement_dna_string(self):
        seq = Seq.MutableSeq("ATGaaaCTG")
        seq.complement()
        self.assertEqual('TACtttGAC', str(seq))

    def test_reverse_complement(self):
        self.mutable_s.reverse_complement()
        self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s))

    def test_reverse_complement_of_protein(self):
        seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein)
        with self.assertRaises(ValueError):
            seq.reverse_complement()

    def test_to_string_method(self):
        """This method is currently deprecated, probably will need to remove this test soon"""
        with warnings.catch_warnings(record=True):
            self.mutable_s.tostring()

    def test_extend_method(self):
        self.mutable_s.extend("GAT")
        self.assertEqual(
            MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna),
            self.mutable_s)

    def test_extend_with_mutable_seq(self):
        self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna))
        self.assertEqual(
            MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna),
            self.mutable_s)

    def test_delete_stride_slice(self):
        del self.mutable_s[4:6 - 1]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_extract_third_nucleotide(self):
        """Test extracting every third nucleotide (slicing with stride 3)"""
        self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna),
                         self.mutable_s[0::3])
        self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna),
                         self.mutable_s[1::3])
        self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna),
                         self.mutable_s[2::3])

    def test_set_wobble_codon_to_n(self):
        """Test setting wobble codon to N (set slice with stride 3)"""
        self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3])
        self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna),
                         self.mutable_s)
Exemplo n.º 13
0
class TestMutableSeq(unittest.TestCase):
    def setUp(self):
        self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna)
        self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)

    def test_mutableseq_creation(self):
        """Test creating MutableSeqs in multiple ways"""
        mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)
        self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq")

        mutable_s = self.s.tomutable()
        self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable")

        array_seq = MutableSeq(array.array(array_indicator, "TCAAAAGGATGCATCATG"),
                               IUPAC.ambiguous_dna)
        self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array")

    def test_repr(self):
        self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())",
                         repr(self.mutable_s))

    def test_truncated_repr(self):
        seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA"
        expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())"
        self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna)))

    def test_equal_comparison(self):
        """Test __eq__ comparison method"""
        self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG")

    def test_equal_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna)

    def test_not_equal_comparison(self):
        """Test __ne__ comparison method"""
        self.assertNotEqual(self.mutable_s, "other thing")

    def test_less_than_comparison(self):
        """Test __lt__ comparison method"""
        self.assertTrue(self.mutable_s[:-1] < self.mutable_s)

    def test_less_than_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna)

    def test_less_than_comparison_without_alphabet(self):
        self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG")

    def test_less_than_or_equal_comparison(self):
        """Test __le__ comparison method"""
        self.assertTrue(self.mutable_s[:-1] <= self.mutable_s)

    def test_less_than_or_equal_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna)

    def test_less_than_or_equal_comparison_without_alphabet(self):
        self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG")

    def test_add_method(self):
        """Test adding wrong type to MutableSeq"""
        with self.assertRaises(TypeError):
            self.mutable_s + 1234

    def test_radd_method(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.mutable_s))

    def test_radd_method_incompatible_alphabets(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna))

    def test_radd_method_using_seq_object(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.s))

    def test_radd_method_wrong_type(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(1234)

    def test_as_string(self):
        self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s))

    def test_length(self):
        self.assertEqual(18, len(self.mutable_s))

    def test_converting_to_immutable(self):
        self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq)

    def test_first_nucleotide(self):
        self.assertEqual('T', self.mutable_s[0])

    def test_setting_slices(self):
        self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna),
                         self.mutable_s[1:5], "Slice mutable seq")

        self.mutable_s[1:3] = "GAT"
        self.assertEqual(MutableSeq("TGATAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s,
                         "Set slice with string and adding extra nucleotide")

        self.mutable_s[1:3] = self.mutable_s[5:7]
        self.assertEqual(MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s, "Set slice with MutableSeq")

        self.mutable_s[1:3] = array.array(array_indicator, "GAT")
        self.assertEqual(MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s, "Set slice with array")

    def test_setting_item(self):
        self.mutable_s[3] = "G"
        self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_deleting_slice(self):
        del self.mutable_s[4:5]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_deleting_item(self):
        del self.mutable_s[3]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_appending(self):
        self.mutable_s.append("C")
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_inserting(self):
        self.mutable_s.insert(4, "G")
        self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_popping_last_item(self):
        self.assertEqual("G", self.mutable_s.pop())

    def test_remove_items(self):
        self.mutable_s.remove("G")
        self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s, "Remove first G")

        self.assertRaises(ValueError, self.mutable_s.remove, 'Z')

    def test_count(self):
        self.assertEqual(7, self.mutable_s.count("A"))
        self.assertEqual(2, self.mutable_s.count("AA"))

    def test_index(self):
        self.assertEqual(2, self.mutable_s.index("A"))
        self.assertRaises(ValueError, self.mutable_s.index, "8888")

    def test_reverse(self):
        """Test using reverse method"""
        self.mutable_s.reverse()
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_reverse_with_stride(self):
        """Test reverse using -1 stride"""
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna),
                         self.mutable_s[::-1])

    def test_complement(self):
        self.mutable_s.complement()
        self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s))

    def test_complement_rna(self):
        seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna)
        seq.complement()
        self.assertEqual(str("UACuuuGAC"), str(seq))

    def test_complement_mixed_aphabets(self):
        seq = Seq.MutableSeq("AUGaaaCTG")
        with self.assertRaises(ValueError):
            seq.complement()

    def test_complement_rna_string(self):
        seq = Seq.MutableSeq("AUGaaaCUG")
        seq.complement()
        self.assertEqual('UACuuuGAC', str(seq))

    def test_complement_dna_string(self):
        seq = Seq.MutableSeq("ATGaaaCTG")
        seq.complement()
        self.assertEqual('TACtttGAC', str(seq))

    def test_reverse_complement(self):
        self.mutable_s.reverse_complement()
        self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s))

    def test_reverse_complement_of_protein(self):
        seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein)
        with self.assertRaises(ValueError):
            seq.reverse_complement()

    def test_to_string_method(self):
        """This method is currently deprecated, probably will need to remove this test soon"""
        with warnings.catch_warnings(record=True):
            self.mutable_s.tostring()

    def test_extend_method(self):
        self.mutable_s.extend("GAT")
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_extend_with_mutable_seq(self):
        self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna))
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_delete_stride_slice(self):
        del self.mutable_s[4:6 - 1]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_extract_third_nucleotide(self):
        """Test extracting every third nucleotide (slicing with stride 3)"""
        self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna), self.mutable_s[0::3])
        self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna), self.mutable_s[1::3])
        self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna), self.mutable_s[2::3])

    def test_set_wobble_codon_to_n(self):
        """Test setting wobble codon to N (set slice with stride 3)"""
        self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3])
        self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna),
                         self.mutable_s)
Exemplo n.º 14
0
    def viterbi(self, sequence, state_alphabet):
        """Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:

        o sequence -- A Seq object with the emission sequence that we
        want to decode.

        o state_alphabet -- The alphabet of the possible state sequences
        that can be generated.
        """
        # calculate logarithms of the transition and emission probs
        log_trans = self._log_transform(self.transition_prob)
        log_emission = self._log_transform(self.emission_prob)

        viterbi_probs = {}
        pred_state_seq = {}
        state_letters = state_alphabet.letters
        # --- initialization
        #
        # NOTE: My index numbers are one less than what is given in Durbin
        # et al, since we are indexing the sequence going from 0 to
        # (Length - 1) not 1 to Length, like in Durbin et al.
        #
        # v_{0}(0) = 1
        viterbi_probs[(state_letters[0], -1)] = 1
        # v_{k}(0) = 0 for k > 0
        for state_letter in state_letters[1:]:
            viterbi_probs[(state_letter, -1)] = 0

        # --- recursion
        # loop over the training squence (i = 1 .. L)
        for i in range(0, len(sequence)):
            # now loop over all of the letters in the state path
            for main_state in state_letters:
                # e_{l}(x_{i})
                emission_part = log_emission[(main_state, sequence[i])]

                # loop over all possible states
                possible_state_probs = {}
                for cur_state in self.transitions_from(main_state):
                    # a_{kl}
                    trans_part = log_trans[(cur_state, main_state)]

                    # v_{k}(i - 1)
                    viterbi_part = viterbi_probs[(cur_state, i - 1)]
                    cur_prob = viterbi_part + trans_part

                    possible_state_probs[cur_state] = cur_prob

                # finally calculate the viterbi probability using the max
                max_prob = max(possible_state_probs.values())
                viterbi_probs[(main_state, i)] = (emission_part + max_prob)

                # now get the most likely state
                for state in possible_state_probs.keys():
                    if possible_state_probs[state] == max_prob:
                        pred_state_seq[(i - 1, main_state)] = state
                        break

        # --- termination
        # calculate the probability of the state path
        # loop over all letters
        all_probs = {}
        for state in state_letters:
            # v_{k}(L)
            viterbi_part = viterbi_probs[(state, len(sequence) - 1)]
            # a_{k0}
            transition_part = log_trans[(state, state_letters[0])]

            all_probs[state] = viterbi_part * transition_part

        state_path_prob = max(all_probs.values())

        # find the last pointer we need to trace back from
        last_state = ''
        for state in all_probs.keys():
            if all_probs[state] == state_path_prob:
                last_state = state

        assert last_state != '', "Didn't find the last state to trace from!"

        # --- traceback
        traceback_seq = MutableSeq('', state_alphabet)

        loop_seq = range(0, len(sequence))
        loop_seq.reverse()

        cur_state = last_state
        for i in loop_seq:
            traceback_seq.append(cur_state)

            cur_state = pred_state_seq[(i - 1, cur_state)]

        # put the traceback sequence in the proper orientation
        traceback_seq.reverse()

        return traceback_seq.toseq(), state_path_prob
Exemplo n.º 15
0
print seq[:5]	#methods as string
print len(seq)
#seq[0]='C'	#aren't mutables
st=str(seq)		#toString
print st

#tipo de dato secuencia editable
from Bio.Seq import MutableSeq
mut_seq=seq.tomutable()	#convertirlo a tipo seq mutable
print mut_seq
mut_seq[0]='C'
print mut_seq
mut_seq=MutableSeq('ATGCCG',IUPAC.IUPACUnambiguousDNA())
#has methods as a list: append(), insert(), pop(), remove()
mut_seq[1:3]='TTT'
mut_seq.reverse()
mut_seq.complement()
print mut_seq
mut_seq.reverse_complement()
print mut_seq

#tipo de dato metadatos de secuencia
from Bio.SeqRecord import SeqRecord
seqrec=SeqRecord(seq,id='001', name='My Secuencia')
#2 main attributes:
#	id: string identifier, optional, recommended
#	seq: Seq object, required
#additional attributes
#	name, description: name and more info of sequence
#	dbxrefs: list of strings, each string an id of a DB
#	features: list of SeqFeature objects, those found in Genbank records
Exemplo n.º 16
0
print id(seq1) == id(seq2)    # seq1 == seq2 look for the same object
print str(seq1) == str(seq2)    # convert to string
print str(seq1) == str(seq3)    # dna similar enought to protein

#MutableSeq
from Bio.Seq import MutableSeq
mutseq = seq1.tomutable()    # convert to MutableSeq
print mutseq, type(mutseq)
mutSeq = MutableSeq('CGTTTAAGCTGC',IUPAC.unambiguous_dna)
print mutSeq, type(mutSeq)
mutseq[1]='T'    # imposible on simple Seq
print mutseq
seq1 = mutseq.toseq()    # convert to Seq
mutSeq.remove('A')    # remove first A
mutSeq[2:-5]='TTTT'
mutSeq.reverse()    # reverse() and reverse_complement() change object itself
print mutSeq
#MutableSeq can't be a dictionary key, Seq and string can

#UnknownSeq
# Subclass of Seq when you know length but not the characters to save memory
from Bio.Seq import UnknownSeq
unk = UnknownSeq(25)
print unk, len(unk), type(unk)
unkDNA = UnknownSeq(20, alphabet=IUPAC.ambiguous_dna)
print unkDNA    # N = any base
unkProt = UnknownSeq(10, alphabet=IUPAC.protein)
print unkProt    # X = any aminoacid

print unkDNA.complement(), unkDNA.reverse_complement()
print unkDNA.transcribe(), unkDNA.translate()