示例#1
0
 def test_multiple_bad(self):
     """MinimalFastaParser should complain or skip bad records"""
     self.assertRaises(RecordError, list, MinimalFastaParser(self.twogood))
     f = list(MinimalFastaParser(self.twogood, strict=False))
     self.assertEqual(len(f), 2)
     a, b = f
     self.assertEqual(a, ("abc", "caggac"))
     self.assertEqual(b, ("456", "cg"))
示例#2
0
 def test_no_labels(self):
     """MinimalFastaParser should return empty list from file w/o seqs"""
     # should fail if strict (the default)
     self.assertRaises(RecordError, list,
                       MinimalFastaParser(self.labels, strict=True))
     # if not strict, should skip the records
     self.assertEqual(list(MinimalFastaParser(self.labels, strict=False)),
                      [])
示例#3
0
    def test_read(self):
        """correctly read content"""
        with open("data" + os.sep + "brca1.fasta") as infile:
            expect = {l: s for l, s in MinimalFastaParser(infile)}

        dstore = self.Class(self.basedir, suffix=".fasta")
        data = dstore.read(self.basedir)
        data = data.splitlines()
        got = {l: s for l, s in MinimalFastaParser(data)}
        self.assertEqual(got, expect)
示例#4
0
    def test_single(self):
        """MinimalFastaParser should read single record as (label, seq) tuple"""
        f = list(MinimalFastaParser(self.oneseq))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ("abc", "UCAG"))

        f = list(MinimalFastaParser(self.multiline))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ("xyz", "UUUUCCAAAAAG"))
示例#5
0
 def test_multiple(self):
     """MinimalFastaParser should read multiline records correctly"""
     f = list(MinimalFastaParser(self.threeseq))
     self.assertEqual(len(f), 3)
     a, b, c = f
     self.assertEqual(a, ("123", "a"))
     self.assertEqual(b, ("abc", "caggac"))
     self.assertEqual(c, ("456", "cg"))
示例#6
0
    def test_gt_bracket_in_seq(self):
        """MinimalFastaParser handles alternate finder function

        this test also illustrates how to use the MinimalFastaParser
        to handle "sequences" that start with a > symbol, which can
        happen when we abuse the MinimalFastaParser to parse
        fasta-like sequence quality files.
        """
        oneseq_w_gt = ">abc\n>CAG\n".split("\n")

        def get_two_line_records(infile):
            line1 = None
            for line in infile:
                if line1 == None:
                    line1 = line
                else:
                    yield (line1, line)
                    line1 = None

        f = list(MinimalFastaParser(oneseq_w_gt, finder=get_two_line_records))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ("abc", ">CAG"))
示例#7
0
 def test_empty(self):
     """MinimalFastaParser should return empty list from 'file' w/o labels"""
     self.assertEqual(list(MinimalFastaParser(self.empty)), [])
     self.assertEqual(list(MinimalFastaParser(self.nolabels, strict=False)),
                      [])
     self.assertRaises(RecordError, list, MinimalFastaParser(self.nolabels))
示例#8
0
 def test_parser_from_file(self):
     """passing path should work"""
     path = os.path.join(data_path, "brca1.fasta")
     seqs = dict(p for p in MinimalFastaParser(path))
     self.assertTrue("Human" in seqs)
示例#9
0
def load_from_fasta(filename):
    infile = open_(filename, mode='rt')
    parser = MinimalFastaParser(infile)
    seqs = [(n, s) for n, s in parser]
    infile.close()
    return ArrayAlignment(data=seqs, moltype=DNA)