def test_bad_values_fail(self): # Just confirm that the machinery works, anything more specific is just # restating the _valid_inputs dict which is more declarative than a # unit-test anyways. with self.assertRaisesRegex(ValueError, 'trim_length'): denoise_16S(self.demux_seqs, -123) with self.assertRaisesRegex(ValueError, 'min_size'): denoise_16S(self.demux_seqs, 100, min_size=-1)
def test_with_stats(self): # manually assessed based on temp output # derep dblr art chim ref miss exp_stats = [('L1S208', 100, 11, 69, 11, 64, 0, 0, 1, 2, 5, 46, 0, 0), ('L1S257', 100, 12, 67, 12, 63, 0, 0, 0, 0, 4, 43, 0, 0), ('L1S57', 100, 11, 60, 11, 58, 0, 0, 0, 0, 4, 39, 0, 0), ('L1S76', 100, 11, 74, 10, 70, 0, 0, 0, 0, 3, 43, 0, 0), ('L2S155', 100, 12, 40, 12, 40, 0, 0, 0, 0, 7, 29, 0, 0), ('L2S175', 100, 11, 44, 11, 42, 0, 0, 0, 0, 6, 33, 0, 0), ('L2S309', 100, 10, 38, 10, 38, 0, 0, 0, 0, 3, 23, 0, 0), ('L2S357', 100, 8, 42, 8, 42, 0, 0, 0, 0, 4, 33, 0, 0), ('L3S294', 100, 10, 33, 10, 33, 0, 0, 0, 0, 4, 18, 0, 0), ('L3S313', 100, 12, 42, 12, 42, 0, 0, 0, 0, 5, 28, 0, 0), ('L4S112', 100, 9, 36, 9, 36, 0, 0, 0, 0, 8, 34, 0, 0), ('L4S63', 100, 9, 33, 9, 33, 0, 0, 0, 0, 3, 19, 0, 0), ('L5S155', 100, 10, 44, 10, 44, 0, 0, 0, 0, 5, 32, 0, 0), ('L5S174', 100, 13, 50, 13, 48, 0, 0, 0, 0, 4, 25, 0, 0), ('L6S20', 100, 9, 45, 8, 43, 0, 0, 0, 0, 6, 39, 0, 0), ('L6S68', 100, 14, 35, 14, 35, 0, 0, 0, 0, 5, 14, 0, 0)] exp_stats = pd.DataFrame(exp_stats, columns=STATS_HEADER) exp_stats.set_index('sample-id', inplace=True) _, _, obs_stats = denoise_16S(self.demux_seqs, 100, sample_stats=True) pdt.assert_frame_equal(obs_stats, exp_stats)
def test_integer_ids(self): int_seqs = SingleLanePerSampleSingleEndFastqDirFmt( self.get_data_path('sample_seqs_integers'), 'r') obs_tab, _, stats = denoise_16S(int_seqs, 100, sample_stats=True) self.assertEqual(set(obs_tab.ids()), {'100', '101', '103', '104'}) self.assertEqual(set(stats.index.values), {'100', '101', '103', '104'})
def test_left_trim_len(self): obs_tab, rep_seqs, stats = denoise_16S(self.demux_seqs, 110, left_trim_len=10) self.assertEqual(len(obs_tab.ids(axis='sample')), 16) self.assertEqual(len(obs_tab.ids(axis='observation')), 20) self.assertEqual(len(list(rep_seqs)), 20) self.assertEqual(len(stats.index), 0)
def test_defaults(self): exp_tab = biom.load_table( self.get_data_path('expected/16S-default.biom')) exp_rep_seqs = list( skbio.io.read(self.get_data_path('expected/16S-default.fasta'), 'fasta', constructor=skbio.DNA, lowercase='ignore')) for seq in exp_rep_seqs: del seq.metadata['description'] obs_tab, rep_seqs, stats = denoise_16S(self.demux_seqs, 100) rep_seqs = _sort_seqs(rep_seqs) exp_rep_seqs = _sort_seqs(exp_rep_seqs) self.assertEqual(obs_tab, exp_tab) self.assertEqual(rep_seqs, exp_rep_seqs) self.assertEqual(list(stats.columns), STATS_HEADER[1:]) self.assertEqual(len(stats), 0)
def test_all_reads_filtered(self): with self.assertRaisesRegex(ValueError, 'filter'): denoise_16S(self.demux_seqs, 10000)
def test_integer_ids_with_underscores(self): bad_seqs = SingleLanePerSampleSingleEndFastqDirFmt( self.get_data_path('sample_seqs_integers_underscore'), 'r') with self.assertRaisesRegex(ValueError, 'Deblur cannot.*100_100.'): denoise_16S(bad_seqs, 100)