def parse_residues(residue_lines, num_base, unpaired_symbol): """Return RnaSequence and Pairs object from residue lines. residue_lines -- list of lines or anything that behaves like it. Lines should contain: residue_position, residue_identiy, residue_partner. num_base -- int, basis of the residue numbering. In bpseq files from the CRW website, the numbering starts at 1. unpaired_symbol -- string, symbol in the 'partner' column that indicates that a base is unpaired. In bpseq files from the CRW website, the unpaired_symbol is '0'. This parameter should be a string to allow other symbols that can't be casted to an integer to indicate unpaired bases. Checks for double entries both in the sequence and the structure, and checks that the structre is valid in the sense that if (up,down) in there, that (down,up) is the same. """ #create dictionary/list for sequence and structure seq_dict = {} pairs = Pairs() for line in residue_lines: try: pos, res, partner = line.strip().split() if partner == unpaired_symbol: # adjust pos, not partner pos = int(pos) - num_base partner = None else: # adjust pos and partner pos = int(pos) - num_base partner = int(partner) - num_base pairs.append((pos, partner)) #fill seq_dict if pos in seq_dict: raise BpseqParseError(\ "Double entry for residue %s (%s in bpseq file)"\ %(str(pos), str(pos+1))) else: seq_dict[pos] = res except ValueError: raise BpseqParseError("Failed to parse line: %s" % (line)) #check for conflicts, remove unpaired bases if pairs.hasConflicts(): raise BpseqParseError("Conflicts in the list of basepairs") pairs = pairs.directed() pairs.sort() # construct sequence from seq_dict seq = RnaSequence(construct_sequence(seq_dict)) return seq, pairs
def parse_residues(residue_lines, num_base, unpaired_symbol): """Return RnaSequence and Pairs object from residue lines. residue_lines -- list of lines or anything that behaves like it. Lines should contain: residue_position, residue_identiy, residue_partner. num_base -- int, basis of the residue numbering. In bpseq files from the CRW website, the numbering starts at 1. unpaired_symbol -- string, symbol in the 'partner' column that indicates that a base is unpaired. In bpseq files from the CRW website, the unpaired_symbol is '0'. This parameter should be a string to allow other symbols that can't be casted to an integer to indicate unpaired bases. Checks for double entries both in the sequence and the structure, and checks that the structre is valid in the sense that if (up,down) in there, that (down,up) is the same. """ #create dictionary/list for sequence and structure seq_dict = {} pairs = Pairs() for line in residue_lines: try: pos, res, partner = line.strip().split() if partner == unpaired_symbol: # adjust pos, not partner pos = int(pos) - num_base partner = None else: # adjust pos and partner pos = int(pos) - num_base partner = int(partner) - num_base pairs.append((pos,partner)) #fill seq_dict if pos in seq_dict: raise BpseqParseError(\ "Double entry for residue %s (%s in bpseq file)"\ %(str(pos), str(pos+1))) else: seq_dict[pos] = res except ValueError: raise BpseqParseError("Failed to parse line: %s"%(line)) #check for conflicts, remove unpaired bases if pairs.hasConflicts(): raise BpseqParseError("Conflicts in the list of basepairs") pairs = pairs.directed() pairs.sort() # construct sequence from seq_dict seq = RnaSequence(construct_sequence(seq_dict)) return seq, pairs
class PairsTests(TestCase): """Tests for Pairs object""" def setUp(self): """Pairs SetUp method for all tests""" self.Empty = Pairs([]) self.OneList = Pairs([[1, 2]]) self.OneTuple = Pairs([(1, 2)]) self.MoreLists = Pairs([[2, 4], [3, 9], [6, 36], [7, 49]]) self.MoreTuples = Pairs([(2, 4), (3, 9), (6, 36), (7, 49)]) self.MulNoOverlap = Pairs([(1, 10), (2, 9), (3, 7), (4, 12)]) self.MulOverlap = Pairs([(1, 2), (2, 3)]) self.Doubles = Pairs([[1, 2], [1, 2], [2, 3], [1, 3]]) self.Undirected = Pairs([(2, 1), (6, 4), (1, 7), (8, 3)]) self.UndirectedNone = Pairs([(5, None), (None, 3)]) self.UndirectedDouble = Pairs([(2, 1), (1, 2)]) self.NoPseudo = Pairs([(1, 20), (2, 19), (3, 7), (4, 6), (10, 15), (11, 14)]) self.NoPseudo2 = Pairs([(1, 3), (4, 6)]) #((.(.)).) self.p0 = Pairs([(0, 6), (1, 5), (3, 8)]) #(.((..(.).).)) self.p1 = Pairs([(0, 9), (2, 12), (3, 10), (5, 7)]) #((.(.(.).)).) self.p2 = Pairs([(0, 10), (1, 9), (3, 12), (5, 7)]) #((.((.(.)).).)) self.p3 = Pairs([(0, 9), (1, 8), (3, 14), (4, 13), (6, 11)]) #(.(((.((.))).)).(((.((((..))).)))).) self.p4 = Pairs([(0,35),(2,11),(3,10),(4,9),(6,14),(7,13),(16,28),\ (17,27),(18,26),(20,33),(21,32),(22,31),(23,30)]) #(.((.).)) self.p5 = Pairs([(0, 5), (2, 8), (3, 7)]) self.p6 = Pairs([(0,19),(2,6),(3,5),(8,14),(9,13),(10,12),\ (16,22),(17,21)]) self.p7 = Pairs([(0,20),(2,6),(3,5),(8,14),(9,10),(11,16),(12,15),\ (17,23),(18,22)]) def test_init(self): """Pairs should initalize with both lists and tuples""" self.assertEqual(self.Empty, []) self.assertEqual(self.OneList, [[1, 2]]) self.assertEqual(self.OneTuple, [(1, 2)]) self.assertEqual(self.MulNoOverlap, [(1, 10), (2, 9), (3, 7), (4, 12)]) self.assertEqual(self.MulOverlap, [(1, 2), (2, 3)]) def test_toPartners(self): """Pairs toPartners() should return a Partners object""" a = Pairs([(1, 5), (3, 4), (6, 9), (7, 8)]) #normal b = Pairs([(0, 4), (2, 6)]) #pseudoknot c = Pairs([(1, 6), (3, 6), (4, 5)]) #conflict self.assertEqual(a.toPartners(10), [None, 5, None, 4, 3, 1, 9, 8, 7, 6]) self.assertEqual(a.toPartners(13,3),\ [None,None,None,None,8,None,7,6,4,12,11,10,9]) assert isinstance(a.toPartners(10), Partners) self.assertEqual(b.toPartners(7), [4, None, 6, None, 0, None, 2]) self.assertRaises(ValueError, c.toPartners, 7) self.assertEqual(c.toPartners(7, strict=False), [None, None, None, 6, 5, 4, 3]) #raises an error when try to insert something at non-existing indices self.assertRaises(IndexError, c.toPartners, 0) def test_toVienna(self): """Pairs toVienna() should return a ViennaStructure if possible""" a = Pairs([(1, 5), (3, 4), (6, 9), (7, 8)]) #normal b = Pairs([(0, 4), (2, 6)]) #pseudoknot c = Pairs([(1, 6), (3, 6), (4, 5)]) #conflict d = Pairs([(1, 6), (3, None)]) e = Pairs([(1, 9), (8, 2), (7, 3)]) #not directed f = Pairs([(1, 6), (2, 5), (10, 15), (14, 11)]) # not directed self.assertEqual(a.toVienna(10), '.(.())(())') self.assertEqual(a.toVienna(13, offset=3), '....(.())(())') self.assertRaises(PairError, b.toVienna, 7) #pseudoknot NOT accepted self.assertRaises(Exception, b.toVienna, 7) #old test for exception self.assertRaises(ValueError, c.toVienna, 7) #pairs containging None are being skipped self.assertEqual(d.toVienna(7), '.(....)') #raises error when trying to insert at non-existing indices self.assertRaises(IndexError, a.toVienna, 3) self.assertEqual(Pairs().toVienna(3), '...') #test when parsing in the sequence self.assertEqual(a.toVienna('ACGUAGCUAG'), '.(.())(())') self.assertEqual(a.toVienna(Rna('AACCGGUUAGCUA'), offset=3),\ '....(.())(())') self.assertEqual(e.toVienna(10), '.(((...)))') self.assertEqual(f.toVienna(20), '.((..))...((..))....') def test_tuples(self): """Pairs tuples() should transform the elements of list to tuples""" x = Pairs([]) x.tuples() assert x == [] x = Pairs([[1, 2], [3, 4]]) x.tuples() assert x == [(1, 2), (3, 4)] x = Pairs([(1, 2), (3, 4)]) x.tuples() assert x == [(1, 2), (3, 4)] assert x != [[1, 2], [3, 4]] def test_unique(self): """Pairs unique() should remove double occurences of certain tuples""" self.assertEqual(self.Empty.unique(), []) self.assertEqual(self.MoreTuples.unique(), self.MoreTuples) self.assertEqual(self.Doubles.unique(), Pairs([(1, 2), (2, 3), (1, 3)])) def test_directed(self): """Pairs directed() should change all pairs so that a<b in (a,b)""" self.assertEqual(self.Empty.directed(), []) res = self.Undirected.directed() res.sort() self.assertEqual(res, Pairs([(1, 2), (1, 7), (3, 8), (4, 6)])) res = self.UndirectedNone.directed() self.assertEqual(res, Pairs([])) res = self.UndirectedDouble.directed() self.assertEqual(res, Pairs([(1, 2)])) def test_symmetric(self): """Pairs symmetric() should add (down,up) for each (up,down)""" self.assertEqual(self.Empty.symmetric(), []) self.assertEqualItems(self.OneTuple.symmetric(), [(2, 1), (1, 2)]) self.assertEqualItems( Pairs([(1, 2), (1, 2)]).symmetric(), [(1, 2), (2, 1)]) self.assertEqualItems(Pairs([(1,2),(3,4)]).symmetric(),\ [(1,2),(2,1),(3,4),(4,3)]) self.assertEqualItems(Pairs([(1, None)]).symmetric(), []) def test_paired(self): """Pairs paired() should omit all pairs containing None""" self.assertEqual(self.Empty.paired(), []) self.assertEqual(Pairs([(1,2),(2,None),(None,3),(None,None)]).paired()\ ,[(1,2)]) def test_hasConflicts(self): """Pairs hasConflicts() should return True if there are conflicts""" assert not self.Empty.hasConflicts() assert not Pairs([(1, 2), (3, 4)]).hasConflicts() assert Pairs([(1, 2), (2, 3)]).hasConflicts() assert Pairs([(1, 2), (2, None)]).hasConflicts() def test_mismatches(self): """Pairs mismatches() should return #pairs that can't be formed""" # with plain string self.assertEqual(Pairs([(0, 1)]).mismatches('AC', {}), 1) self.assertEqual( Pairs([(0, 1)]).mismatches('AC', {('A', 'C'): None}), 0) self.assertEqual( Pairs([(0, 1)]).mismatches('AC', {('A', 'G'): None}), 1) self.assertEqual(Pairs([(0,1),(2,3),(3,1)]).\ mismatches('ACGU',{('A','U'):None}),3) # using sequence with alphabet sequence = Rna('ACGUA') self.assertEqual( Pairs([(0, 1), (0, 4), (0, 3)]).mismatches(sequence), 2) def test_hasPseudoknots(self): """Pairs hasPseudoknots() should return True if there's a pseudoknot""" assert not self.NoPseudo.hasPseudoknots() assert not self.NoPseudo2.hasPseudoknots() #add tests for ((.))() etc assert self.p0.hasPseudoknots() assert self.p1.hasPseudoknots() assert self.p2.hasPseudoknots() assert self.p3.hasPseudoknots() assert self.p4.hasPseudoknots() assert self.p5.hasPseudoknots() assert self.p6.hasPseudoknots() assert self.p7.hasPseudoknots()
class PairsTests(TestCase): """Tests for Pairs object""" def setUp(self): """Pairs SetUp method for all tests""" self.Empty = Pairs([]) self.OneList = Pairs([[1,2]]) self.OneTuple = Pairs([(1,2)]) self.MoreLists = Pairs([[2,4],[3,9],[6,36],[7,49]]) self.MoreTuples = Pairs([(2,4),(3,9),(6,36),(7,49)]) self.MulNoOverlap = Pairs([(1,10),(2,9),(3,7),(4,12)]) self.MulOverlap = Pairs([(1,2),(2,3)]) self.Doubles = Pairs([[1,2],[1,2],[2,3],[1,3]]) self.Undirected = Pairs([(2,1),(6,4),(1,7),(8,3)]) self.UndirectedNone = Pairs([(5,None),(None,3)]) self.UndirectedDouble = Pairs([(2,1),(1,2)]) self.NoPseudo = Pairs([(1,20),(2,19),(3,7),(4,6),(10,15),(11,14)]) self.NoPseudo2 = Pairs([(1,3),(4,6)]) #((.(.)).) self.p0 = Pairs([(0,6),(1,5),(3,8)]) #(.((..(.).).)) self.p1 = Pairs([(0,9),(2,12),(3,10),(5,7)]) #((.(.(.).)).) self.p2 = Pairs([(0,10),(1,9),(3,12),(5,7)]) #((.((.(.)).).)) self.p3 = Pairs([(0,9),(1,8),(3,14),(4,13),(6,11)]) #(.(((.((.))).)).(((.((((..))).)))).) self.p4 = Pairs([(0,35),(2,11),(3,10),(4,9),(6,14),(7,13),(16,28),\ (17,27),(18,26),(20,33),(21,32),(22,31),(23,30)]) #(.((.).)) self.p5 = Pairs([(0,5),(2,8),(3,7)]) self.p6 = Pairs([(0,19),(2,6),(3,5),(8,14),(9,13),(10,12),\ (16,22),(17,21)]) self.p7 = Pairs([(0,20),(2,6),(3,5),(8,14),(9,10),(11,16),(12,15),\ (17,23),(18,22)]) def test_init(self): """Pairs should initalize with both lists and tuples""" self.assertEqual(self.Empty,[]) self.assertEqual(self.OneList,[[1,2]]) self.assertEqual(self.OneTuple,[(1,2)]) self.assertEqual(self.MulNoOverlap,[(1,10),(2,9),(3,7),(4,12)]) self.assertEqual(self.MulOverlap,[(1,2),(2,3)]) def test_toPartners(self): """Pairs toPartners() should return a Partners object""" a = Pairs([(1,5),(3,4),(6,9),(7,8)]) #normal b = Pairs([(0,4),(2,6)]) #pseudoknot c = Pairs([(1,6),(3,6),(4,5)]) #conflict self.assertEqual(a.toPartners(10),[None,5,None,4,3,1,9,8,7,6]) self.assertEqual(a.toPartners(13,3),\ [None,None,None,None,8,None,7,6,4,12,11,10,9]) assert isinstance(a.toPartners(10),Partners) self.assertEqual(b.toPartners(7),[4,None,6,None,0,None,2]) self.assertRaises(ValueError,c.toPartners,7) self.assertEqual(c.toPartners(7,strict=False),[None,None,None,6,5,4,3]) #raises an error when try to insert something at non-existing indices self.assertRaises(IndexError,c.toPartners,0) def test_toVienna(self): """Pairs toVienna() should return a ViennaStructure if possible""" a = Pairs([(1,5),(3,4),(6,9),(7,8)]) #normal b = Pairs([(0,4),(2,6)]) #pseudoknot c = Pairs([(1,6),(3,6),(4,5)]) #conflict d = Pairs([(1,6),(3,None)]) e = Pairs([(1,9),(8,2),(7,3)]) #not directed f = Pairs([(1,6),(2,5),(10,15),(14,11)]) # not directed self.assertEqual(a.toVienna(10),'.(.())(())') self.assertEqual(a.toVienna(13,offset=3),'....(.())(())') self.assertRaises(PairError,b.toVienna,7) #pseudoknot NOT accepted self.assertRaises(Exception,b.toVienna,7) #old test for exception self.assertRaises(ValueError,c.toVienna,7) #pairs containging None are being skipped self.assertEquals(d.toVienna(7),'.(....)') #raises error when trying to insert at non-existing indices self.assertRaises(IndexError,a.toVienna,3) self.assertEqual(Pairs().toVienna(3),'...') #test when parsing in the sequence self.assertEqual(a.toVienna('ACGUAGCUAG'),'.(.())(())') self.assertEqual(a.toVienna(Rna('AACCGGUUAGCUA'), offset=3),\ '....(.())(())') self.assertEqual(e.toVienna(10),'.(((...)))') self.assertEqual(f.toVienna(20),'.((..))...((..))....') def test_tuples(self): """Pairs tuples() should transform the elements of list to tuples""" x = Pairs([]) x.tuples() assert x == [] x = Pairs([[1,2],[3,4]]) x.tuples() assert x == [(1,2),(3,4)] x = Pairs([(1,2),(3,4)]) x.tuples() assert x == [(1,2),(3,4)] assert x != [[1,2],[3,4]] def test_unique(self): """Pairs unique() should remove double occurences of certain tuples""" self.assertEqual(self.Empty.unique(),[]) self.assertEqual(self.MoreTuples.unique(),self.MoreTuples) self.assertEqual(self.Doubles.unique(),Pairs([(1,2),(2,3),(1,3)])) def test_directed(self): """Pairs directed() should change all pairs so that a<b in (a,b)""" self.assertEqual(self.Empty.directed(),[]) res = self.Undirected.directed() res.sort() self.assertEqual(res,Pairs([(1,2),(1,7),(3,8),(4,6)])) res = self.UndirectedNone.directed() self.assertEqual(res,Pairs([])) res = self.UndirectedDouble.directed() self.assertEqual(res,Pairs([(1,2)])) def test_symmetric(self): """Pairs symmetric() should add (down,up) for each (up,down)""" self.assertEqual(self.Empty.symmetric(),[]) self.assertEqualItems(self.OneTuple.symmetric(),[(2,1),(1,2)]) self.assertEqualItems(Pairs([(1,2),(1,2)]).symmetric(),[(1,2),(2,1)]) self.assertEqualItems(Pairs([(1,2),(3,4)]).symmetric(),\ [(1,2),(2,1),(3,4),(4,3)]) self.assertEqualItems(Pairs([(1,None)]).symmetric(),[]) def test_paired(self): """Pairs paired() should omit all pairs containing None""" self.assertEqual(self.Empty.paired(),[]) self.assertEqual(Pairs([(1,2),(2,None),(None,3),(None,None)]).paired()\ ,[(1,2)]) def test_hasConflicts(self): """Pairs hasConflicts() should return True if there are conflicts""" assert not self.Empty.hasConflicts() assert not Pairs([(1,2),(3,4)]).hasConflicts() assert Pairs([(1,2),(2,3)]).hasConflicts() assert Pairs([(1,2),(2,None)]).hasConflicts() def test_mismatches(self): """Pairs mismatches() should return #pairs that can't be formed""" # with plain string self.assertEqual(Pairs([(0,1)]).mismatches('AC',{}),1) self.assertEqual(Pairs([(0,1)]).mismatches('AC',{('A','C'):None}),0) self.assertEqual(Pairs([(0,1)]).mismatches('AC',{('A','G'):None}),1) self.assertEqual(Pairs([(0,1),(2,3),(3,1)]).\ mismatches('ACGU',{('A','U'):None}),3) # using sequence with alphabet sequence = Rna('ACGUA') self.assertEqual(Pairs([(0,1),(0,4),(0,3)]).mismatches(sequence),2) def test_hasPseudoknots(self): """Pairs hasPseudoknots() should return True if there's a pseudoknot""" assert not self.NoPseudo.hasPseudoknots() assert not self.NoPseudo2.hasPseudoknots() #add tests for ((.))() etc assert self.p0.hasPseudoknots() assert self.p1.hasPseudoknots() assert self.p2.hasPseudoknots() assert self.p3.hasPseudoknots() assert self.p4.hasPseudoknots() assert self.p5.hasPseudoknots() assert self.p6.hasPseudoknots() assert self.p7.hasPseudoknots()