def test_indent_splitter(self): """indent_splitter should split lines at correct locations""" # if lines have same indent, should not group together lines = ["abc xxx", "def yyy"] self.assertEqual(list(indent_splitter(lines)), [[lines[0]], [lines[1]]]) # if second line is indented, should group with first lines = ["abc xxx", " def yyy"] self.assertEqual(list(indent_splitter(lines)), [[lines[0], lines[1]]]) # if both lines indented but second is more, should group with first lines = [" abc xxx", " def yyy"] self.assertEqual(list(indent_splitter(lines)), [[lines[0], lines[1]]]) # if both lines indented equally, should not group lines = [" abc xxx", " def yyy"] self.assertEqual(list(indent_splitter(lines)), [[lines[0]], [lines[1]]]) # for more complex situation, should produce correct grouping lines = [ " xyz", # 0 - " xxx", # 1 - " yyy", # 2 " uuu", # 3 " iii", # 4 " qaz", # 5 - " wsx", # 6 - " az", # 7 " sx", # 8 " gb", # 9 " bg", # 10 " aaa", # 11 - ] self.assertEqual( list(indent_splitter(lines)), [[lines[0]], lines[1:5], [lines[5]], lines[6:11], [lines[11]]], ) # real example from genbank file lines = """LOCUS NT_016354 92123751 bp DNA linear CON 29-AUG-2006 DEFINITION H**o sapiens chromosome 4 genomic contig, reference assembly. ACCESSION NT_016354 NT_006109 NT_006204 NT_006245 NT_006302 NT_006371 NT_006397 NT_016393 NT_016589 NT_016599 NT_016606 NT_022752 NT_022753 NT_022755 NT_022760 NT_022774 NT_022797 NT_022803 NT_022846 NT_022960 NT_025694 NT_028147 NT_029273 NT_030643 NT_030646 NT_030662 NT_031780 NT_031781 NT_031791 NT_034703 NT_034705 NT_037628 NT_037629 NT_079512 VERSION NT_016354.18 GI:88977422 KEYWORDS . SOURCE H**o sapiens (human) ORGANISM H**o sapiens Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; H**o. ? REFERENCE 2 (bases 1 to 92123751) AUTHORS International Human Genome Sequencing Consortium. TITLE Finishing the euchromatic sequence of the human genome""".split( "\n") self.assertEqual( list(indent_splitter(lines)), [ [lines[0]], [lines[1]], lines[2:8], [lines[8]], [lines[9]], lines[10:15], [lines[15]], lines[16:], ], )
l3_b = Location(12) l3 = Location([l3_a, l3_b]) ll = LocationList([l, l2, l3]) s = ll.extract("ACGTGCAGTCAGTAGCAT") # 123456789012345678 self.assertEqual(s, "G" + "TGC" + "CAG") # check a case where it wraps around l5_a = Location(16) l5_b = Location(4) l5 = Location([l5_a, l5_b]) ll = LocationList([l5]) s = ll.extract("ACGTGCAGTCAGTAGCAT") self.assertEqual(s, "CATACGT") if __name__ == "__main__": from sys import argv if len(argv) > 2 and argv[1] == "x": filename = argv[2] lines = open(filename) for i in indent_splitter(lines): print("******") print(i[0]) for j in indent_splitter(i[1:]): print("?????") for line in j: print(line) else: main()