def test_no_labels(self): """FastaParser should return empty list from file w/o seqs""" # should fail if strict (the default) self.assertRaises(RecordError, list, FastaParser(self.labels, strict=True)) # if not strict, should skip the records self.assertEqual(list(FastaParser(self.labels, strict=False)), [])
def test_multiple_bad(self): """Parser should complain or skip bad records""" self.assertRaises(RecordError, list, FastaParser(self.twogood)) f = list(FastaParser(self.twogood, strict=False)) self.assertEqual(len(f), 2) a, b = f a, b = a[1], b[1] # field 0 is name self.assertEqual((a.name, a), ("abc", "caggac")) self.assertEqual((b.name, b), ("456", "cg"))
def test_single(self): """FastaParser should read single record as seq object""" f = list(FastaParser(self.oneseq)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ("abc", "UCAG")) self.assertEqual(a[1].name, "abc") f = list(FastaParser(self.multiline)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ("xyz", "UUUUCCAAAAAG")) self.assertEqual(a[1].name, "xyz")
def test_multiple_constructor_bad(self): """Parser should complain or skip bad records w/ constructor""" def dnastrict(x, **kwargs): try: return Dna(x, check=True, **kwargs) except Exception as e: raise RecordError("Could not convert sequence") self.assertRaises(RecordError, list, FastaParser(self.oneX, dnastrict)) f = list(FastaParser(self.oneX, dnastrict, strict=False)) self.assertEqual(len(f), 2) a, b = f a, b = a[1], b[1] self.assertEqual((a.name, a), ("abc", "caggac".upper())) self.assertEqual((b.name, b), ("456", "cg".upper()))
def test_single_constructor(self): """FastaParser should use constructors if supplied""" f = list(FastaParser(self.oneseq, Dna)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ("abc", "TCAG")) self.assertEqual(a[1].name, "abc") def upper_abc(x): return None, {"ABC": x.upper()} f = list(FastaParser(self.multiline, Dna, upper_abc)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, (None, "TTTTCCAAAAAG")) self.assertEqual(a[1].name, None) self.assertEqual(a[1].info.ABC, "XYZ")
def test_multiple(self): """FastaParser should read multiline records correctly""" f = list(FastaParser(self.threeseq)) self.assertEqual(len(f), 3) for i in f: assert isinstance(i[1], Sequence) a, b, c = f self.assertEqual((a[1].name, a[1]), ("123", "a")) self.assertEqual((b[1].name, b[1]), ("abc", "caggac")) self.assertEqual((c[1].name, c[1]), ("456", "cg"))
def test_annotate_from_gff(self): """correctly annotates a Sequence from a gff file""" from cogent3.parse.fasta import FastaParser fasta_path = os.path.join("data/c_elegans_WS199_dna_shortened.fasta") gff3_path = os.path.join("data/c_elegans_WS199_shortened_gff.gff3") name, seq = next(FastaParser(fasta_path)) sequence = Sequence(seq) sequence.annotate_from_gff(gff3_path) matches = [m for m in sequence.get_annotations_matching("*", extend_query=True)] # 13 features with one having 2 parents, so 14 instances should be found self.assertEqual(len(matches), 14)
def test_empty(self): """FastaParser should return empty list from 'file' w/o labels""" self.assertEqual(list(FastaParser(self.empty)), []) self.assertEqual(list(FastaParser(self.nolabels, strict=False)), []) self.assertRaises(RecordError, list, FastaParser(self.nolabels))