Пример #1
0
 def test_StockholmParser_strict_invalid_structure(self):
     """StockholmParser: functions when toggling strict record w/ bad struct
     """
     # strict
     self.assertRaises(RecordError,list,\
         StockholmParser(self._fake_record_bad_structure_2))
     #not strict
     self.assertEqual(list(StockholmParser(\
         self._fake_record_bad_structure_2,\
     strict=False)),[])
Пример #2
0
    def test_StockholmParser_strict_missing_fields(self):
        """StockholmParser: toggle strict functions correctly """
        # strict = True
        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_no_headers))

        # strict = False
        self.assertEqual(list(StockholmParser(self._fake_record_no_headers,\
            strict=False)), [])
        self.assertEqual(list(StockholmParser(self._fake_record_no_sequences,\
            strict=False)), [])
Пример #3
0
    def test_StockholmParser_strict_invalid_headers(self):
        """StockholmParser: functions when toggling strict record w/ bad header
        """
        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_bad_header_1))

        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_bad_header_2))

        # strict = False
        x = list(StockholmParser(self._fake_record_bad_header_1, strict=False))
        obs = list(StockholmParser(self._fake_record_bad_header_1,\
            strict=False))[0].Info.GF.keys()
        self.assertEqual(len(obs), 1)
        obs = list(StockholmParser(self._fake_record_bad_header_2,\
            strict=False))[0].Info.GF.keys()
        self.assertEqual(len(obs), 1)
Пример #4
0
    def test_StockholmParser_strict_invalid_structure(self):
        """StockholmParser: toggle strict functions w/ invalid structure
        """
        #strict = True
        self.assertRaises(RecordError,list,\
            StockholmParser(self._fake_record_bad_structure_1))

        # strict = False
        self.assertEqual(list(MinimalStockholmParser(\
            self._fake_record_bad_structure_1,strict=False))[0][2],None)
Пример #5
0
    def test_StockholmParser_strict_invalid_sequences(self):
        """StockholmParser: functions when toggling strict w/ record w/ bad seq
        """
        self.assertRaises(
            RecordError, list,
            MinimalStockholmParser(self._fake_record_bad_sequence_1))

        # strict = False
        # in 'False' mode you expect to get back as much as possible, also
        # parts of sequences
        self.assertEqual(len(list(StockholmParser(\
            self._fake_record_bad_sequence_1,\
            strict=False))[0].NamedSeqs), 3)
Пример #6
0
 def test_StockholmParser_single_family(self):
     """StockholmParser: should work on a family in stockholm format"""
     exp_header = {}
     exp_aln = {'K02120.1/628-682':\
         'AUGGGAAAUUCCCCCUCCUAUAACCCCCCCGCUGGUAUCUCCCCCUCAGACUGGC',\
         'D00647.1/629-683':\
         'AUGGGAAACUCCCCCUCCUAUAACCCCCCCGCUGGCAUCUCCCCCUCAGACUGGC'}
     exp_struct = '<<<<<<.........>>>>>>.........<<<<<<.............>>>>>>'
     aln = list(StockholmParser(self.single_family))[0]
     h = aln.Info['GF']
     a = aln
     s = aln.Info['Struct']
     self.assertEqual(h, exp_header)
     self.assertEqual(a, exp_aln)
     self.assertEqual(s, exp_struct)
Пример #7
0
    def test_StockholmParser(self):
        """StockholmParser: integrity of output """

        expected_sequences =\
        [''.join(['AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA',\
            'AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU']),\
        ''.join(['AACGCAUCGGAUUUCCCGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUU',\
            'AGCAAGUUUGAUCCCGACUCCUG-CGAGUCGGGAUUU']),\
        ''.join(['CUCACAUCAGAUUUCCUGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUA',\
            'AGCAAGUUUGAUCCCGACCCGU--AGGGCCGGGAUUU'])]
        expected_structure = ''.join(\
        ['...<<<<<<<.....>>>>>>>....................<<<<<...',\
        '.>>>>>....<<<<<<<<<<.....>>>>>>>>>>..'])

        for r in StockholmParser(self._fake_record):
            headers = r.Info
            sequences = r
            structure = r.Info['Struct']
            self.assertEqual(headers['GF']['AccessionNumber'], 'RF00014')
            self.assertEqual(headers['GF']['Author'], 'Mifsud W')
            self.assertEqualItems(sequences.values(), expected_sequences)
            assert isinstance(sequences, Alignment)
            self.assertEqual(structure, expected_structure)
            assert isinstance(structure, WussStructure)
Пример #8
0
from cogent import LoadSeqs, RNA
from cogent.parse.stockholm import StockholmParser
from sys import argv
from numpy import zeros

#stats.py /path/to/file.sto /path/to/folder/out/

if __name__ == "__main__":
    if argv[2][-1] != "/":
        argv[2] += "/"
    fin = open(argv[1])
    aln = LoadSeqs(data=StockholmParser(fin).next(), moltype=RNA)
    fin.close()
    consensus = aln.majorityConsensus()
    #counts    A:0 U:1 G:2 C:3 -:4
    counts = zeros(5, dtype=int)
    countsout = open(argv[2] + "counts.txt", 'w')
    countsout.write('\t'.join(['pos', 'maj', 'A', 'T', 'G', 'C', '-', '\n']))
    #count all nucs that do not conform to consensus for each position
    for pos, nucs in enumerate(aln.iterPositions()):
        majnuc = consensus[pos]
        for nuc in nucs:
            if nuc != majnuc:
                if nuc == 'A':
                    counts[0] += 1
                elif nuc == 'U':
                    counts[1] += 1
                elif nuc == 'G':
                    counts[2] += 1
                elif nuc == 'C':
                    counts[3] += 1