Exemplo n.º 1
0
def invert_sequence(chrSeq, start, stop):

    invert = chrSeq[int(start) - 1:int(stop)]
    MutableSeq.reverse_complement(invert)
    begin = MutableSeq.__add__(chrSeq[:int(start) - 1], invert)
    chrSeq = MutableSeq.__add__(begin, chrSeq[int(stop):])
    return chrSeq
Exemplo n.º 2
0
def create_genome_seq(aligned):

    aligned_seq = aligned.seq if type(aligned.seq) == str else aligned.seq.decode('UTF-8')
        
    genome_seq = MutableSeq(aligned_seq)

    # see samtools documentation for MD string
    
    err = re.findall(MD_REGEX, aligned.opt("MD"))
    
    seq_ix = 0
    
    # step through sequence
    for matched_bases, curr_err in err:
        
        seq_ix += int(matched_bases)
        
        assert '^' not in curr_err
        assert curr_err != genome_seq[seq_ix]
        
        genome_seq[seq_ix] = curr_err
        seq_ix += 1
    
    if aligned.is_reverse:
        genome_seq.reverse_complement()

    return genome_seq
Exemplo n.º 3
0
def add_to_pileup_dict(sams, aligned_read_set, pileup_dict):
    
    # sanity check that all the qnames (RNA read IDs) are the same
    for read in aligned_read_set:
        assert read.qname == aligned_read_set[0].qname

    if not True in [read.is_unmapped for read in aligned_read_set]:
        
        # all alignments mapped
        for read in aligned_read_set:
            
            for op, op_len in read.cigar:
                
                if op > 0 and op < 7:
                    # do not sample reads where there are insertions or deletions   
                    return
                    
            assert len(read.seq) == len(aligned_read_set[0].seq)
          
        # if aligned reads are reversed, we reverse them and hold on to that info.
        
        pos_dicts = [dict(read.aligned_pairs) for read in aligned_read_set]
        genome_seqs = [create_genome_seq(read) for read in aligned_read_set]
        qual = bytearray(aligned_read_set[0].qual, 'utf-8')
        seq = MutableSeq(aligned_read_set[0].seq if type(aligned_read_set[0].seq) == str else aligned_read_set[0].seq.decode('UTF-8'))  
        if aligned_read_set[0].is_reverse:
            seq.reverse_complement()
            qual = qual[::-1]
        
        for genome_seq in genome_seqs:
            assert len(genome_seq) == len(seq)

        for i in range(0, len(seq)):
                        
            # need (chrom, pos, genome_seq[i]) tuples for each aligned_read
            chroms = [sam.getrname(a.tid) for sam, a in izip(sams, aligned_read_set)]
            positions = [d[i] if not a.is_reverse else d[len(seq) - i - 1] for d, a in zip(pos_dicts, aligned_read_set)]
            genome_seq_i = [g[i] for g in genome_seqs]
            
            genomic_locs = tuple(zip(chroms, positions, genome_seq_i))
            
            pileup_dict[genomic_locs][seq[i]][qual[i]] += 1
Exemplo n.º 4
0
def seq_batch_query():
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    list_file = input('list file name:\n')
    with open(list_file, 'r') as In:
        organism_list = In.read().split(sep='\n')
    cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);')
    for organism in organism_list:
        cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism, ))
    cur.execute(
        'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head',
        (organism))
    result = cur.fetchall()
    cur.execute('DROP TABLE tasklist;')
    cur.close()
    con.close()
    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        filename = i[2]
        sequence = MutableSeq(i[5])
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, filename, sequence]
        query_result.append(record)
    for i in query_result:
        with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout:
            Fileout.write('>%s\n%s\n' % (i[0], i[2]))
            # rps12 may have larger than 50k fragments,  here to filter it
    rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta')
    rps12short = list()
    for item in rps12:
        if len(item.seq) < 4000:
            rps12short.append(item)
    SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta')
    print('Done.\n')
Exemplo n.º 5
0
def seq_batch_query():
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    list_file = input('list file name:\n')
    with open(list_file, 'r') as In:
        organism_list = In.read().split(sep='\n')
    cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);')
    for organism in organism_list:
        cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism,))
    cur.execute(
        'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head',
        (organism))
    result = cur.fetchall()
    cur.execute('DROP TABLE tasklist;')
    cur.close()
    con.close()
    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        filename = i[2]
        sequence = MutableSeq(i[5])
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, filename, sequence]
        query_result.append(record)
    for i in query_result:
        with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout:
            Fileout.write('>%s\n%s\n' % (i[0], i[2]))
            # rps12 may have larger than 50k fragments,  here to filter it
    rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta')
    rps12short = list()
    for item in rps12:
        if len(item.seq) < 4000:
            rps12short.append(item)
    SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta')
    print('Done.\n')
Exemplo n.º 6
0
class TestMutableSeq(unittest.TestCase):
    def setUp(self):
        self.s = Seq.Seq("TCAAAAGGATGCATCATG")
        self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG")

    def test_mutableseq_creation(self):
        """Test creating MutableSeqs in multiple ways."""
        mutable_s = MutableSeq("TCAAAAGGATGCATCATG")
        self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq")

        mutable_s = self.s.tomutable()
        self.assertIsInstance(mutable_s, MutableSeq,
                              "Converting Seq to mutable")

        array_seq = MutableSeq(array.array("u", "TCAAAAGGATGCATCATG"))
        self.assertIsInstance(array_seq, MutableSeq,
                              "Creating MutableSeq using array")

    def test_repr(self):
        self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG')",
                         repr(self.mutable_s))

    def test_truncated_repr(self):
        seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA"
        expected = (
            "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA')"
        )
        self.assertEqual(expected, repr(MutableSeq(seq)))

    def test_equal_comparison(self):
        """Test __eq__ comparison method."""
        self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG")

    def test_not_equal_comparison(self):
        """Test __ne__ comparison method."""
        self.assertNotEqual(self.mutable_s, "other thing")

    def test_less_than_comparison(self):
        """Test __lt__ comparison method."""
        self.assertLess(self.mutable_s[:-1], self.mutable_s)

    def test_less_than_comparison_of_incompatible_types(self):
        with self.assertRaises(TypeError):
            self.mutable_s < 1

    def test_less_than_comparison_without_alphabet(self):
        self.assertLessEqual(self.mutable_s[:-1], "TCAAAAGGATGCATCATG")

    def test_less_than_or_equal_comparison(self):
        """Test __le__ comparison method."""
        self.assertLessEqual(self.mutable_s[:-1], self.mutable_s)

    def test_less_than_or_equal_comparison_of_incompatible_types(self):
        with self.assertRaises(TypeError):
            self.mutable_s <= 1

    def test_less_than_or_equal_comparison_without_alphabet(self):
        self.assertLessEqual(self.mutable_s[:-1], "TCAAAAGGATGCATCATG")

    def test_greater_than_comparison(self):
        """Test __gt__ comparison method."""
        self.assertGreater(self.mutable_s, self.mutable_s[:-1])

    def test_greater_than_comparison_of_incompatible_types(self):
        with self.assertRaises(TypeError):
            self.mutable_s > 1

    def test_greater_than_comparison_without_alphabet(self):
        self.assertGreater(self.mutable_s, "TCAAAAGGATGCATCAT")

    def test_greater_than_or_equal_comparison(self):
        """Test __ge__ comparison method."""
        self.assertGreaterEqual(self.mutable_s, self.mutable_s)

    def test_greater_than_or_equal_comparison_of_incompatible_types(self):
        with self.assertRaises(TypeError):
            self.mutable_s >= 1

    def test_greater_than_or_equal_comparison_without_alphabet(self):
        self.assertGreaterEqual(self.mutable_s, "TCAAAAGGATGCATCATG")

    def test_add_method(self):
        """Test adding wrong type to MutableSeq."""
        with self.assertRaises(TypeError):
            self.mutable_s + 1234

    def test_radd_method(self):
        self.assertEqual(
            "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
            self.mutable_s.__radd__(self.mutable_s),
        )

    def test_radd_method_incompatible_alphabets(self):
        self.assertEqual(
            "UCAAAAGGATCAAAAGGATGCATCATG",
            self.mutable_s.__radd__(MutableSeq("UCAAAAGGA")),
        )

    def test_radd_method_using_seq_object(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.s))

    def test_radd_method_wrong_type(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(1234)

    def test_as_string(self):
        self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s))

    def test_length(self):
        self.assertEqual(18, len(self.mutable_s))

    def test_converting_to_immutable(self):
        self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq)

    def test_first_nucleotide(self):
        self.assertEqual("T", self.mutable_s[0])

    def test_setting_slices(self):
        self.assertEqual(
            MutableSeq("CAAA"),
            self.mutable_s[1:5],
            "Slice mutable seq",
        )

        self.mutable_s[1:3] = "GAT"
        self.assertEqual(
            MutableSeq("TGATAAAGGATGCATCATG"),
            self.mutable_s,
            "Set slice with string and adding extra nucleotide",
        )

        self.mutable_s[1:3] = self.mutable_s[5:7]
        self.assertEqual(
            MutableSeq("TAATAAAGGATGCATCATG"),
            self.mutable_s,
            "Set slice with MutableSeq",
        )

        self.mutable_s[1:3] = array.array("u", "GAT")
        self.assertEqual(
            MutableSeq("TGATTAAAGGATGCATCATG"),
            self.mutable_s,
            "Set slice with array",
        )

    def test_setting_item(self):
        self.mutable_s[3] = "G"
        self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG"), self.mutable_s)

    def test_deleting_slice(self):
        del self.mutable_s[4:5]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)

    def test_deleting_item(self):
        del self.mutable_s[3]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)

    def test_appending(self):
        self.mutable_s.append("C")
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC"), self.mutable_s)

    def test_inserting(self):
        self.mutable_s.insert(4, "G")
        self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG"), self.mutable_s)

    def test_popping_last_item(self):
        self.assertEqual("G", self.mutable_s.pop())

    def test_remove_items(self):
        self.mutable_s.remove("G")
        self.assertEqual(MutableSeq("TCAAAAGATGCATCATG"), self.mutable_s,
                         "Remove first G")

        self.assertRaises(ValueError, self.mutable_s.remove, "Z")

    def test_count(self):
        self.assertEqual(7, self.mutable_s.count("A"))
        self.assertEqual(2, self.mutable_s.count("AA"))

    def test_index(self):
        self.assertEqual(2, self.mutable_s.index("A"))
        self.assertRaises(ValueError, self.mutable_s.index, "8888")

    def test_reverse(self):
        """Test using reverse method."""
        self.mutable_s.reverse()
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT"), self.mutable_s)

    def test_reverse_with_stride(self):
        """Test reverse using -1 stride."""
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT"),
                         self.mutable_s[::-1])

    def test_complement(self):
        self.mutable_s.complement()
        self.assertEqual("AGTTTTCCTACGTAGTAC", str(self.mutable_s))

    def test_complement_rna(self):
        seq = Seq.MutableSeq("AUGaaaCUG")
        seq.complement()
        self.assertEqual("UACuuuGAC", str(seq))

    def test_complement_mixed_aphabets(self):
        seq = Seq.MutableSeq("AUGaaaCTG")
        with self.assertRaises(ValueError):
            seq.complement()

    def test_complement_rna_string(self):
        seq = Seq.MutableSeq("AUGaaaCUG")
        seq.complement()
        self.assertEqual("UACuuuGAC", str(seq))

    def test_complement_dna_string(self):
        seq = Seq.MutableSeq("ATGaaaCTG")
        seq.complement()
        self.assertEqual("TACtttGAC", str(seq))

    def test_reverse_complement(self):
        self.mutable_s.reverse_complement()
        self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s))

    def test_extend_method(self):
        self.mutable_s.extend("GAT")
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT"), self.mutable_s)

    def test_extend_with_mutable_seq(self):
        self.mutable_s.extend(MutableSeq("TTT"))
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT"), self.mutable_s)

    def test_delete_stride_slice(self):
        del self.mutable_s[4:6 - 1]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)

    def test_extract_third_nucleotide(self):
        """Test extracting every third nucleotide (slicing with stride 3)."""
        self.assertEqual(MutableSeq("TAGTAA"), self.mutable_s[0::3])
        self.assertEqual(MutableSeq("CAGGTT"), self.mutable_s[1::3])
        self.assertEqual(MutableSeq("AAACCG"), self.mutable_s[2::3])

    def test_set_wobble_codon_to_n(self):
        """Test setting wobble codon to N (set slice with stride 3)."""
        self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3])
        self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN"), self.mutable_s)
Exemplo n.º 7
0
def seq_query():
    """Sequence query function,  to be continued.
    """
    query_type = input('1.Specific fragment\n'
                       '2.Specific Organism\n'
                       '3.Specific gene\n'
                       '4.All\n'
                       '5.All cds\n')
    organize = input('Organize output?(y/n)\n')
    if query_type not in ['1', '2', '3', '4', '5']:
        raise ValueError('wrong input!\n')
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    if query_type == '1':
        organism = input('Organism:\n')
        gene = input('Gene:\n')
        frag_type = input(
            'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?',
            ('%' + gene + '%', frag_type, organism))
        result = cur.fetchall()
    elif query_type == '2':
        organism = input('Organism:\n')
        frag_type = input(
            'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n'
        )
        if frag_type == 'fragments':
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ?  ORDER BY Head',
                (organism, ))
        else:
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head',
                ('%' + organism + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '3':
        gene = input('Gene:\n')
        frag_type = input(
            'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon',
            ('%' + gene + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '4':
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon'
        )
        result = cur.fetchall()
    elif query_type == '5':
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon'
        )
        result = cur.fetchall()

    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        sequence = MutableSeq(i[5])
        gene = i[2]
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, gene, sequence]
        query_result.append(record)

    if organize == 'y':
        if not exists('output'):
            makedirs('output')
        for i in query_result:
            file_name = ''.join(
                ['output', '/', i[1].replace('/', ''), '.fasta'])
            with open(file_name, 'a') as output_file:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))
    else:
        output = input('Enter output filename:\n')
        with open('.'.join([output, 'fasta']), 'w') as output_file:
            for i in query_result:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))

    cur.close()
    con.close()
    print('Done.\n')
Exemplo n.º 8
0
class TestMutableSeq(unittest.TestCase):
    def setUp(self):
        self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna)
        self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)

    def test_mutableseq_creation(self):
        """Test creating MutableSeqs in multiple ways"""
        mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)
        self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq")

        mutable_s = self.s.tomutable()
        self.assertIsInstance(mutable_s, MutableSeq,
                              "Converting Seq to mutable")

        array_seq = MutableSeq(
            array.array(array_indicator, "TCAAAAGGATGCATCATG"),
            IUPAC.ambiguous_dna)
        self.assertIsInstance(array_seq, MutableSeq,
                              "Creating MutableSeq using array")

    def test_repr(self):
        self.assertEqual(
            "MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())",
            repr(self.mutable_s))

    def test_truncated_repr(self):
        seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA"
        expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())"
        self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna)))

    def test_equal_comparison(self):
        """Test __eq__ comparison method"""
        self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG")

    def test_equal_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna)

    def test_not_equal_comparison(self):
        """Test __ne__ comparison method"""
        self.assertNotEqual(self.mutable_s, "other thing")

    def test_less_than_comparison(self):
        """Test __lt__ comparison method"""
        self.assertTrue(self.mutable_s[:-1] < self.mutable_s)

    def test_less_than_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG",
                                             IUPAC.ambiguous_rna)

    def test_less_than_comparison_without_alphabet(self):
        self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG")

    def test_less_than_or_equal_comparison(self):
        """Test __le__ comparison method"""
        self.assertTrue(self.mutable_s[:-1] <= self.mutable_s)

    def test_less_than_or_equal_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG",
                                              IUPAC.ambiguous_rna)

    def test_less_than_or_equal_comparison_without_alphabet(self):
        self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG")

    def test_add_method(self):
        """Test adding wrong type to MutableSeq"""
        with self.assertRaises(TypeError):
            self.mutable_s + 1234

    def test_radd_method(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.mutable_s))

    def test_radd_method_incompatible_alphabets(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(
                MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna))

    def test_radd_method_using_seq_object(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.s))

    def test_radd_method_wrong_type(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(1234)

    def test_as_string(self):
        self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s))

    def test_length(self):
        self.assertEqual(18, len(self.mutable_s))

    def test_converting_to_immutable(self):
        self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq)

    def test_first_nucleotide(self):
        self.assertEqual('T', self.mutable_s[0])

    def test_setting_slices(self):
        self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna),
                         self.mutable_s[1:5], "Slice mutable seq")

        self.mutable_s[1:3] = "GAT"
        self.assertEqual(
            MutableSeq("TGATAAAGGATGCATCATG",
                       IUPAC.ambiguous_dna), self.mutable_s,
            "Set slice with string and adding extra nucleotide")

        self.mutable_s[1:3] = self.mutable_s[5:7]
        self.assertEqual(
            MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna),
            self.mutable_s, "Set slice with MutableSeq")

        self.mutable_s[1:3] = array.array(array_indicator, "GAT")
        self.assertEqual(
            MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna),
            self.mutable_s, "Set slice with array")

    def test_setting_item(self):
        self.mutable_s[3] = "G"
        self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_deleting_slice(self):
        del self.mutable_s[4:5]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_deleting_item(self):
        del self.mutable_s[3]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_appending(self):
        self.mutable_s.append("C")
        self.assertEqual(
            MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna),
            self.mutable_s)

    def test_inserting(self):
        self.mutable_s.insert(4, "G")
        self.assertEqual(
            MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna),
            self.mutable_s)

    def test_popping_last_item(self):
        self.assertEqual("G", self.mutable_s.pop())

    def test_remove_items(self):
        self.mutable_s.remove("G")
        self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s, "Remove first G")

        self.assertRaises(ValueError, self.mutable_s.remove, 'Z')

    def test_count(self):
        self.assertEqual(7, self.mutable_s.count("A"))
        self.assertEqual(2, self.mutable_s.count("AA"))

    def test_index(self):
        self.assertEqual(2, self.mutable_s.index("A"))
        self.assertRaises(ValueError, self.mutable_s.index, "8888")

    def test_reverse(self):
        """Test using reverse method"""
        self.mutable_s.reverse()
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_reverse_with_stride(self):
        """Test reverse using -1 stride"""
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna),
                         self.mutable_s[::-1])

    def test_complement(self):
        self.mutable_s.complement()
        self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s))

    def test_complement_rna(self):
        seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna)
        seq.complement()
        self.assertEqual(str("UACuuuGAC"), str(seq))

    def test_complement_mixed_aphabets(self):
        seq = Seq.MutableSeq("AUGaaaCTG")
        with self.assertRaises(ValueError):
            seq.complement()

    def test_complement_rna_string(self):
        seq = Seq.MutableSeq("AUGaaaCUG")
        seq.complement()
        self.assertEqual('UACuuuGAC', str(seq))

    def test_complement_dna_string(self):
        seq = Seq.MutableSeq("ATGaaaCTG")
        seq.complement()
        self.assertEqual('TACtttGAC', str(seq))

    def test_reverse_complement(self):
        self.mutable_s.reverse_complement()
        self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s))

    def test_reverse_complement_of_protein(self):
        seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein)
        with self.assertRaises(ValueError):
            seq.reverse_complement()

    def test_to_string_method(self):
        """This method is currently deprecated, probably will need to remove this test soon"""
        with warnings.catch_warnings(record=True):
            self.mutable_s.tostring()

    def test_extend_method(self):
        self.mutable_s.extend("GAT")
        self.assertEqual(
            MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna),
            self.mutable_s)

    def test_extend_with_mutable_seq(self):
        self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna))
        self.assertEqual(
            MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna),
            self.mutable_s)

    def test_delete_stride_slice(self):
        del self.mutable_s[4:6 - 1]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_extract_third_nucleotide(self):
        """Test extracting every third nucleotide (slicing with stride 3)"""
        self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna),
                         self.mutable_s[0::3])
        self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna),
                         self.mutable_s[1::3])
        self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna),
                         self.mutable_s[2::3])

    def test_set_wobble_codon_to_n(self):
        """Test setting wobble codon to N (set slice with stride 3)"""
        self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3])
        self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna),
                         self.mutable_s)
Exemplo n.º 9
0
class TestMutableSeq(unittest.TestCase):
    def setUp(self):
        self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna)
        self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)

    def test_mutableseq_creation(self):
        """Test creating MutableSeqs in multiple ways"""
        mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)
        self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq")

        mutable_s = self.s.tomutable()
        self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable")

        array_seq = MutableSeq(array.array(array_indicator, "TCAAAAGGATGCATCATG"),
                               IUPAC.ambiguous_dna)
        self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array")

    def test_repr(self):
        self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())",
                         repr(self.mutable_s))

    def test_truncated_repr(self):
        seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA"
        expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())"
        self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna)))

    def test_equal_comparison(self):
        """Test __eq__ comparison method"""
        self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG")

    def test_equal_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna)

    def test_not_equal_comparison(self):
        """Test __ne__ comparison method"""
        self.assertNotEqual(self.mutable_s, "other thing")

    def test_less_than_comparison(self):
        """Test __lt__ comparison method"""
        self.assertTrue(self.mutable_s[:-1] < self.mutable_s)

    def test_less_than_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna)

    def test_less_than_comparison_without_alphabet(self):
        self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG")

    def test_less_than_or_equal_comparison(self):
        """Test __le__ comparison method"""
        self.assertTrue(self.mutable_s[:-1] <= self.mutable_s)

    def test_less_than_or_equal_comparison_of_incompatible_alphabets(self):
        with warnings.catch_warnings(record=True):
            self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna)

    def test_less_than_or_equal_comparison_without_alphabet(self):
        self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG")

    def test_add_method(self):
        """Test adding wrong type to MutableSeq"""
        with self.assertRaises(TypeError):
            self.mutable_s + 1234

    def test_radd_method(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.mutable_s))

    def test_radd_method_incompatible_alphabets(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna))

    def test_radd_method_using_seq_object(self):
        self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG",
                         self.mutable_s.__radd__(self.s))

    def test_radd_method_wrong_type(self):
        with self.assertRaises(TypeError):
            self.mutable_s.__radd__(1234)

    def test_as_string(self):
        self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s))

    def test_length(self):
        self.assertEqual(18, len(self.mutable_s))

    def test_converting_to_immutable(self):
        self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq)

    def test_first_nucleotide(self):
        self.assertEqual('T', self.mutable_s[0])

    def test_setting_slices(self):
        self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna),
                         self.mutable_s[1:5], "Slice mutable seq")

        self.mutable_s[1:3] = "GAT"
        self.assertEqual(MutableSeq("TGATAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s,
                         "Set slice with string and adding extra nucleotide")

        self.mutable_s[1:3] = self.mutable_s[5:7]
        self.assertEqual(MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s, "Set slice with MutableSeq")

        self.mutable_s[1:3] = array.array(array_indicator, "GAT")
        self.assertEqual(MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s, "Set slice with array")

    def test_setting_item(self):
        self.mutable_s[3] = "G"
        self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_deleting_slice(self):
        del self.mutable_s[4:5]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_deleting_item(self):
        del self.mutable_s[3]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_appending(self):
        self.mutable_s.append("C")
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_inserting(self):
        self.mutable_s.insert(4, "G")
        self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_popping_last_item(self):
        self.assertEqual("G", self.mutable_s.pop())

    def test_remove_items(self):
        self.mutable_s.remove("G")
        self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s, "Remove first G")

        self.assertRaises(ValueError, self.mutable_s.remove, 'Z')

    def test_count(self):
        self.assertEqual(7, self.mutable_s.count("A"))
        self.assertEqual(2, self.mutable_s.count("AA"))

    def test_index(self):
        self.assertEqual(2, self.mutable_s.index("A"))
        self.assertRaises(ValueError, self.mutable_s.index, "8888")

    def test_reverse(self):
        """Test using reverse method"""
        self.mutable_s.reverse()
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_reverse_with_stride(self):
        """Test reverse using -1 stride"""
        self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna),
                         self.mutable_s[::-1])

    def test_complement(self):
        self.mutable_s.complement()
        self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s))

    def test_complement_rna(self):
        seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna)
        seq.complement()
        self.assertEqual(str("UACuuuGAC"), str(seq))

    def test_complement_mixed_aphabets(self):
        seq = Seq.MutableSeq("AUGaaaCTG")
        with self.assertRaises(ValueError):
            seq.complement()

    def test_complement_rna_string(self):
        seq = Seq.MutableSeq("AUGaaaCUG")
        seq.complement()
        self.assertEqual('UACuuuGAC', str(seq))

    def test_complement_dna_string(self):
        seq = Seq.MutableSeq("ATGaaaCTG")
        seq.complement()
        self.assertEqual('TACtttGAC', str(seq))

    def test_reverse_complement(self):
        self.mutable_s.reverse_complement()
        self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s))

    def test_reverse_complement_of_protein(self):
        seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein)
        with self.assertRaises(ValueError):
            seq.reverse_complement()

    def test_to_string_method(self):
        """This method is currently deprecated, probably will need to remove this test soon"""
        with warnings.catch_warnings(record=True):
            self.mutable_s.tostring()

    def test_extend_method(self):
        self.mutable_s.extend("GAT")
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_extend_with_mutable_seq(self):
        self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna))
        self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_delete_stride_slice(self):
        del self.mutable_s[4:6 - 1]
        self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna),
                         self.mutable_s)

    def test_extract_third_nucleotide(self):
        """Test extracting every third nucleotide (slicing with stride 3)"""
        self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna), self.mutable_s[0::3])
        self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna), self.mutable_s[1::3])
        self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna), self.mutable_s[2::3])

    def test_set_wobble_codon_to_n(self):
        """Test setting wobble codon to N (set slice with stride 3)"""
        self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3])
        self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna),
                         self.mutable_s)
 rcl_file = tempfn + '-rcl.fa'
 f = open(rcl_file, 'w')
 nbr = 0
 for rii in xrange(len(break_pos[1])):
     if (break_pos[0][rii] == break_nb_ord[cl_ii]):
         nbr += 1
         pbr = break_pos[1][rii]
         pbseq = pb_reads[pbr]
         if (read_blocks[pbr][0]['refE'] == break_nb_ord[2]):
             readbp = read_blocks[pbr][0]['readE']
         else:
             readbp = read_blocks[pbr][0]['readS']
         pbseq = pbseq[max(0, readbp - 400):min(len(pbseq), readbp + 400)]
         if (read_blocks[pbr][0]['refStd'] == '-'):
             pbseq = MutableSeq(pbseq, generic_dna)
             pbseq.reverse_complement()
             pbseq = str(pbseq)
         f.write('>' + pbr + '\n')
         f.write(pbseq)
         f.write("\n")
 f.close()
 # Run Clustal
 clo_outfile = tempfn + '-clo-out.fa'
 clo_cmd = ['clustalo', '-i', rcl_file, '-o', clo_outfile, '--force']
 clo_out = subprocess.check_output(clo_cmd)
 # Get consensus
 msa_out = []
 for record in SeqIO.parse(clo_outfile, "fasta"):
     msa_out.append(str(record.seq))
 clo_cons = ""
 for cons_i in xrange(len(msa_out[0])):
Exemplo n.º 11
0
    qstart = int(bline[6])
    qend = int(bline[7])
    sstart = int(bline[8])
    send = int(bline[9])
    revcomp = False
    if(sstart < send):
        sstart = sstart - qstart
        send = send + reglen - qend
    else:
        sswitch = sstart
        sstart = send - reglen + qend
        send = sswitch + qstart
        revcomp = True
    if(sstart < 0):
        sstart = 0
    graph_title = bline[0] + ' ' + bline[1] + '_' + str(sstart) + '_'
    if(send > len(assfa[bline[1]])):
        send = len(assfa[bline[1]]) - 1
        graph_title = graph_title + str(send) + ' (end)'
    else:
        graph_title = graph_title + str(send)
    print str(sstart) + '-' + str(send)
    assseq = assfa[bline[1]][sstart:send]
    if(revcomp):
        assseq = MutableSeq(str(assseq), generic_dna)
        assseq.reverse_complement()
        assseq = str(assseq)
    plotMummer(regseq, assseq, args.outdir + '/' + bline[0] + '-' + bline[1])

blast_con.close()
Exemplo n.º 12
0
st=str(seq)		#toString
print st

#tipo de dato secuencia editable
from Bio.Seq import MutableSeq
mut_seq=seq.tomutable()	#convertirlo a tipo seq mutable
print mut_seq
mut_seq[0]='C'
print mut_seq
mut_seq=MutableSeq('ATGCCG',IUPAC.IUPACUnambiguousDNA())
#has methods as a list: append(), insert(), pop(), remove()
mut_seq[1:3]='TTT'
mut_seq.reverse()
mut_seq.complement()
print mut_seq
mut_seq.reverse_complement()
print mut_seq

#tipo de dato metadatos de secuencia
from Bio.SeqRecord import SeqRecord
seqrec=SeqRecord(seq,id='001', name='My Secuencia')
#2 main attributes:
#	id: string identifier, optional, recommended
#	seq: Seq object, required
#additional attributes
#	name, description: name and more info of sequence
#	dbxrefs: list of strings, each string an id of a DB
#	features: list of SeqFeature objects, those found in Genbank records
#	annotations: dictionary with further info, can't be set on initialization
seqrec=SeqRecord(Seq('mdstnvrsgmksrkkkpkttvidddddcmtcsacqsklvkisditkvsldyintmrgntlacaacgsslkllndfas',Bio.Alphabet.generic_protein), id='P20994.1', name='P20994', description='Protein A19', dbxrefs=['Pfam:PF05077', 'InterPro:IPR007769', 'DIP:2186N'])
seqrec.annotations['note']='A simple note'
Exemplo n.º 13
0
def seq_query():
    """Sequence query function,  to be continued.
    """
    query_type = input(
        '1.Specific fragment\n'
        '2.Specific Organism\n'
        '3.Specific gene\n'
        '4.All\n'
        '5.All cds\n'
    )
    organize = input('Organize output?(y/n)\n')
    if query_type not in ['1', '2', '3', '4', '5']:
        raise ValueError('wrong input!\n')
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    if query_type == '1':
        organism = input('Organism:\n')
        gene = input('Gene:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?',
            ('%' + gene + '%', frag_type, organism))
        result = cur.fetchall()
    elif query_type == '2':
        organism = input('Organism:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n')
        if frag_type == 'fragments':
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ?  ORDER BY Head',
                (organism,))
        else:
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head',
                ('%' + organism + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '3':
        gene = input('Gene:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon',
            ('%' + gene + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '4':
        cur.execute('SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon')
        result = cur.fetchall()
    elif query_type == '5':
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon')
        result = cur.fetchall()

    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        sequence = MutableSeq(i[5])
        gene = i[2]
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, gene, sequence]
        query_result.append(record)

    if organize == 'y':
        if not exists('output'):
            makedirs('output')
        for i in query_result:
            file_name = ''.join([
                'output',
                '/',
                i[1].replace('/', ''),
                '.fasta'
            ])
            with open(file_name, 'a') as output_file:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))
    else:
        output = input('Enter output filename:\n')
        with open('.'.join([output, 'fasta']), 'w') as output_file:
            for i in query_result:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))

    cur.close()
    con.close()
    print('Done.\n')