def test_infer_align_format(self): # simple cases # map line = 'S1/1 NC_123456' obs = infer_align_format(StringIO(line)) self.assertEqual(obs[0], 'map') self.assertListEqual(obs[1], [line]) # b6o line = 'S1/1 NC_123456 100 100 0 0 1 100 25 124 0.1 100' self.assertEqual(infer_align_format(StringIO(line))[0], 'b6o') # sam line = 'S1 77 NC_123456 26 0 100M * 0 0 * *' self.assertEqual(infer_align_format(StringIO(line))[0], 'sam') # sam header line = '@HD VN:1.0 SO:unsorted' self.assertEqual(infer_align_format(StringIO(line))[0], 'sam') # empty file with self.assertRaises(ValueError) as ctx: infer_align_format(StringIO()) self.assertEqual(str(ctx.exception), ( 'Alignment file is empty or unreadable.')) # invalid sam line = 'S1 * * * * * * 0 0 * *' with self.assertRaises(ValueError) as ctx: infer_align_format(StringIO(line)) self.assertEqual(str(ctx.exception), ( 'Cannot determine alignment file format.')) # cannot determine line = 'Hi there!' with self.assertRaises(ValueError) as ctx: infer_align_format(StringIO(line)) self.assertEqual(str(ctx.exception), ( 'Cannot determine alignment file format.')) # real files # Bowtie2 (sam) with openzip(join(self.datdir, 'align', 'bowtie2', 'S01.sam.xz')) as f: self.assertEqual(infer_align_format(f)[0], 'sam') # BURST (b6o) with openzip(join(self.datdir, 'align', 'burst', 'S01.b6.bz2')) as f: self.assertEqual(infer_align_format(f)[0], 'b6o')
def test_openzip(self): text = 'Hello World!' # read regular file fp = join(self.tmpdir, 'test.txt') with open(fp, 'w') as f: f.write(text) with openzip(fp) as f: obs = f.read() self.assertEqual(obs, text) # write regular file with openzip(fp, 'wt') as f: f.write('Here I am!') with open(fp, 'r') as f: obs = f.read() self.assertEqual(obs, 'Here I am!') remove(fp) # read compressed file fpz = join(self.tmpdir, 'test.txt.gz') with gzip.open(fpz, 'wb') as f: f.write(text.encode()) with openzip(fpz) as f: obs = f.read() self.assertEqual(obs, text) remove(fpz) # write compressed file fpb = join(self.tmpdir, 'test.txt.bz2') with openzip(fpb, 'at') as f: f.write('Here I am!') with bz2.open(fpb, 'rt') as f: obs = f.read() self.assertEqual(obs, 'Here I am!') remove(fpb)
def test_read_gene_coords(self): # simple case tbl = ('## GCF_000123456', '# NC_123456', '1 5 384', '2 410 933', '# NC_789012', '1 912 638', '2 529 75') obs = read_gene_coords(tbl, sort=True) exp = { 'NC_123456': [(5, True, True, '1'), (384, False, True, '1'), (410, True, True, '2'), (933, False, True, '2')], 'NC_789012': [(75, True, True, '2'), (529, False, True, '2'), (638, True, True, '1'), (912, False, True, '1')] } self.assertDictEqual(obs, exp) # don't sort obs = read_gene_coords(tbl, sort=False)['NC_789012'] exp = [(638, True, True, '1'), (912, False, True, '1'), (75, True, True, '2'), (529, False, True, '2')] self.assertListEqual(obs, exp) # incorrect formats # only one column msg = 'Cannot extract coordinates from line:' with self.assertRaises(ValueError) as ctx: read_gene_coords(('hello', )) self.assertEqual(str(ctx.exception), f'{msg} "hello".') # only two columns with self.assertRaises(ValueError) as ctx: read_gene_coords(('hello\t100', )) self.assertEqual(str(ctx.exception), f'{msg} "hello\t100".') # three columns but 3rd is string with self.assertRaises(ValueError) as ctx: read_gene_coords(('hello\t100\tthere', )) self.assertEqual(str(ctx.exception), f'{msg} "hello\t100\tthere".') # real coords file fp = join(self.datdir, 'function', 'coords.txt.xz') with openzip(fp) as f: obs = read_gene_coords(f, sort=True) self.assertEqual(len(obs), 107) obs_ = obs['G000006745'] self.assertEqual(len(obs_), 7188) self.assertTupleEqual(obs_[0], (372, True, True, '1')) self.assertTupleEqual(obs_[1], (806, False, True, '1')) self.assertTupleEqual(obs_[2], (816, True, True, '2')) self.assertTupleEqual(obs_[3], (2177, False, True, '2'))