Exemplo n.º 1
0
    def test_indent_splitter(self):
        """indent_splitter should split lines at correct locations"""
        # if lines have same indent, should not group together
        lines = ["abc    xxx", "def    yyy"]
        self.assertEqual(list(indent_splitter(lines)),
                         [[lines[0]], [lines[1]]])
        # if second line is indented, should group with first
        lines = ["abc    xxx", " def    yyy"]
        self.assertEqual(list(indent_splitter(lines)), [[lines[0], lines[1]]])

        # if both lines indented but second is more, should group with first
        lines = [" abc    xxx", "  def    yyy"]
        self.assertEqual(list(indent_splitter(lines)), [[lines[0], lines[1]]])

        # if both lines indented equally, should not group
        lines = ["   abc    xxx", "   def    yyy"]
        self.assertEqual(list(indent_splitter(lines)),
                         [[lines[0]], [lines[1]]])

        # for more complex situation, should produce correct grouping
        lines = [
            "  xyz",  # 0 -
            "  xxx",  # 1 -
            "   yyy",  # 2
            "   uuu",  # 3
            "   iii",  # 4
            "  qaz",  # 5 -
            "  wsx",  # 6 -
            "   az",  # 7
            "   sx",  # 8
            "        gb",  # 9
            "   bg",  # 10
            "  aaa",  # 11 -
        ]
        self.assertEqual(
            list(indent_splitter(lines)),
            [[lines[0]], lines[1:5], [lines[5]], lines[6:11], [lines[11]]],
        )

        # real example from genbank file
        lines = """LOCUS       NT_016354           92123751 bp    DNA     linear   CON 29-AUG-2006
DEFINITION  H**o sapiens chromosome 4 genomic contig, reference assembly.
ACCESSION   NT_016354 NT_006109 NT_006204 NT_006245 NT_006302 NT_006371
            NT_006397 NT_016393 NT_016589 NT_016599 NT_016606 NT_022752
            NT_022753 NT_022755 NT_022760 NT_022774 NT_022797 NT_022803
            NT_022846 NT_022960 NT_025694 NT_028147 NT_029273 NT_030643
            NT_030646 NT_030662 NT_031780 NT_031781 NT_031791 NT_034703
            NT_034705 NT_037628 NT_037629 NT_079512
VERSION     NT_016354.18  GI:88977422
KEYWORDS    .
SOURCE      H**o sapiens (human)
  ORGANISM  H**o sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; H**o.
?
REFERENCE   2  (bases 1 to 92123751)
  AUTHORS   International Human Genome Sequencing Consortium.
  TITLE     Finishing the euchromatic sequence of the human genome""".split(
            "\n")
        self.assertEqual(
            list(indent_splitter(lines)),
            [
                [lines[0]],
                [lines[1]],
                lines[2:8],
                [lines[8]],
                [lines[9]],
                lines[10:15],
                [lines[15]],
                lines[16:],
            ],
        )
Exemplo n.º 2
0
        l3_b = Location(12)
        l3 = Location([l3_a, l3_b])
        ll = LocationList([l, l2, l3])
        s = ll.extract("ACGTGCAGTCAGTAGCAT")
        #               123456789012345678
        self.assertEqual(s, "G" + "TGC" + "CAG")
        # check a case where it wraps around
        l5_a = Location(16)
        l5_b = Location(4)
        l5 = Location([l5_a, l5_b])
        ll = LocationList([l5])
        s = ll.extract("ACGTGCAGTCAGTAGCAT")
        self.assertEqual(s, "CATACGT")


if __name__ == "__main__":
    from sys import argv

    if len(argv) > 2 and argv[1] == "x":
        filename = argv[2]
        lines = open(filename)
        for i in indent_splitter(lines):
            print("******")
            print(i[0])
            for j in indent_splitter(i[1:]):
                print("?????")
                for line in j:
                    print(line)
    else:
        main()