def test_indent_splitter(self): """indent_splitter should split lines at correct locations""" #if lines have same indent, should not group together lines = [ 'abc xxx', 'def yyy' ] self.assertEqual(list(indent_splitter(lines)),\ [[lines[0]], [lines[1]]]) #if second line is indented, should group with first lines = [ 'abc xxx', ' def yyy' ] self.assertEqual(list(indent_splitter(lines)),\ [[lines[0], lines[1]]]) #if both lines indented but second is more, should group with first lines = [ ' abc xxx', ' def yyy' ] self.assertEqual(list(indent_splitter(lines)),\ [[lines[0], lines[1]]]) #if both lines indented equally, should not group lines = [ ' abc xxx', ' def yyy' ] self.assertEqual(list(indent_splitter(lines)), \ [[lines[0]], [lines[1]]]) #for more complex situation, should produce correct grouping lines = [ ' xyz', #0 - ' xxx', #1 - ' yyy', #2 ' uuu', #3 ' iii', #4 ' qaz', #5 - ' wsx', #6 - ' az', #7 ' sx', #8 ' gb',#9 ' bg', #10 ' aaa', #11 - ] self.assertEqual(list(indent_splitter(lines)), \ [[lines[0]], lines[1:5], [lines[5]], lines[6:11], [lines[11]]]) #real example from genbank file lines = \ """LOCUS LAAJ4821 16866 bp DNA MAM 23-AUG-2000 DEFINITION Loxodonta africana complete mitochondrial genomic sequence. ACCESSION AJ224821 VERSION AJ224821.1 GI:3021460 KEYWORDS complete genome. SOURCE African elephant. ORGANISM Mitochondrion Loxodonta africana Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Proboscidea; Elephantidae; Loxodonta. REFERENCE 1 (bases 1 to 16866) AUTHORS Hauf,J., Waddell,P.J., Chalwatzis,N., Joger,U. and Zimmermann,F.K. TITLE The complete mitochondrial genome sequence of the African elephant (Loxodonta africana), phylogenetic relationships of Proboscidea to other mammals and D-loop heteroplasmy""".split('\n') self.assertEqual(list(indent_splitter(lines)), \ [[lines[0]],[lines[1]],[lines[2]],[lines[3]],[lines[4]],lines[5:9],\ lines[9:]])
s = ll.extract('ACGTGCAGTCAGTAGCAT') # 123456789012345678 self.assertEqual(s, 'G'+'TGC'+'CAG') #check a case where it wraps around l5_a = Location(16) l5_b = Location(4) l5 = Location([l5_a,l5_b]) ll = LocationList([l5]) s = ll.extract('ACGTGCAGTCAGTAGCAT') self.assertEqual(s, 'CATACGT') if __name__ == '__main__': from sys import argv if len(argv) > 2 and argv[1] == 'x': filename = argv[2] lines = open(filename) for i in indent_splitter(lines): print '******' print i[0] for j in indent_splitter(i[1:]): print '?????' for line in j: print line else: main()