def test_StockholmParser_strict_invalid_structure(self): """StockholmParser: functions when toggling strict record w/ bad struct """ # strict self.assertRaises(RecordError,list,\ StockholmParser(self._fake_record_bad_structure_2)) #not strict self.assertEqual(list(StockholmParser(\ self._fake_record_bad_structure_2,\ strict=False)),[])
def test_StockholmParser_strict_missing_fields(self): """StockholmParser: toggle strict functions correctly """ # strict = True self.assertRaises(RecordError,list,\ StockholmParser(self._fake_record_no_headers)) # strict = False self.assertEqual(list(StockholmParser(self._fake_record_no_headers,\ strict=False)), []) self.assertEqual(list(StockholmParser(self._fake_record_no_sequences,\ strict=False)), [])
def test_StockholmParser_strict_invalid_headers(self): """StockholmParser: functions when toggling strict record w/ bad header """ self.assertRaises(RecordError,list,\ StockholmParser(self._fake_record_bad_header_1)) self.assertRaises(RecordError,list,\ StockholmParser(self._fake_record_bad_header_2)) # strict = False x = list(StockholmParser(self._fake_record_bad_header_1, strict=False)) obs = list(StockholmParser(self._fake_record_bad_header_1,\ strict=False))[0].Info.GF.keys() self.assertEqual(len(obs), 1) obs = list(StockholmParser(self._fake_record_bad_header_2,\ strict=False))[0].Info.GF.keys() self.assertEqual(len(obs), 1)
def test_StockholmParser_strict_invalid_structure(self): """StockholmParser: toggle strict functions w/ invalid structure """ #strict = True self.assertRaises(RecordError,list,\ StockholmParser(self._fake_record_bad_structure_1)) # strict = False self.assertEqual(list(MinimalStockholmParser(\ self._fake_record_bad_structure_1,strict=False))[0][2],None)
def test_StockholmParser_strict_invalid_sequences(self): """StockholmParser: functions when toggling strict w/ record w/ bad seq """ self.assertRaises( RecordError, list, MinimalStockholmParser(self._fake_record_bad_sequence_1)) # strict = False # in 'False' mode you expect to get back as much as possible, also # parts of sequences self.assertEqual(len(list(StockholmParser(\ self._fake_record_bad_sequence_1,\ strict=False))[0].NamedSeqs), 3)
def test_StockholmParser_single_family(self): """StockholmParser: should work on a family in stockholm format""" exp_header = {} exp_aln = {'K02120.1/628-682':\ 'AUGGGAAAUUCCCCCUCCUAUAACCCCCCCGCUGGUAUCUCCCCCUCAGACUGGC',\ 'D00647.1/629-683':\ 'AUGGGAAACUCCCCCUCCUAUAACCCCCCCGCUGGCAUCUCCCCCUCAGACUGGC'} exp_struct = '<<<<<<.........>>>>>>.........<<<<<<.............>>>>>>' aln = list(StockholmParser(self.single_family))[0] h = aln.Info['GF'] a = aln s = aln.Info['Struct'] self.assertEqual(h, exp_header) self.assertEqual(a, exp_aln) self.assertEqual(s, exp_struct)
def test_StockholmParser(self): """StockholmParser: integrity of output """ expected_sequences =\ [''.join(['AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA',\ 'AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU']),\ ''.join(['AACGCAUCGGAUUUCCCGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUU',\ 'AGCAAGUUUGAUCCCGACUCCUG-CGAGUCGGGAUUU']),\ ''.join(['CUCACAUCAGAUUUCCUGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUA',\ 'AGCAAGUUUGAUCCCGACCCGU--AGGGCCGGGAUUU'])] expected_structure = ''.join(\ ['...<<<<<<<.....>>>>>>>....................<<<<<...',\ '.>>>>>....<<<<<<<<<<.....>>>>>>>>>>..']) for r in StockholmParser(self._fake_record): headers = r.Info sequences = r structure = r.Info['Struct'] self.assertEqual(headers['GF']['AccessionNumber'], 'RF00014') self.assertEqual(headers['GF']['Author'], 'Mifsud W') self.assertEqualItems(sequences.values(), expected_sequences) assert isinstance(sequences, Alignment) self.assertEqual(structure, expected_structure) assert isinstance(structure, WussStructure)
from cogent import LoadSeqs, RNA from cogent.parse.stockholm import StockholmParser from sys import argv from numpy import zeros #stats.py /path/to/file.sto /path/to/folder/out/ if __name__ == "__main__": if argv[2][-1] != "/": argv[2] += "/" fin = open(argv[1]) aln = LoadSeqs(data=StockholmParser(fin).next(), moltype=RNA) fin.close() consensus = aln.majorityConsensus() #counts A:0 U:1 G:2 C:3 -:4 counts = zeros(5, dtype=int) countsout = open(argv[2] + "counts.txt", 'w') countsout.write('\t'.join(['pos', 'maj', 'A', 'T', 'G', 'C', '-', '\n'])) #count all nucs that do not conform to consensus for each position for pos, nucs in enumerate(aln.iterPositions()): majnuc = consensus[pos] for nuc in nucs: if nuc != majnuc: if nuc == 'A': counts[0] += 1 elif nuc == 'U': counts[1] += 1 elif nuc == 'G': counts[2] += 1 elif nuc == 'C': counts[3] += 1