def test_missing_feature_ids(self): feature_mc = qiime2.CategoricalMetadataColumn( pd.Series(['g0', 'g1', 'g2', 'g1', 'g2', 'extra'], name='foo', index=pd.Index(['a', 'c', 'd', 'e', 'f', 'g'], name='featureid'))) data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5], [0, 1, 100], [7, 8, 9]]) # g is missing on purpose table = biom.Table(data, sample_ids=['s1', 's2', 's3'], observation_ids=['a', 'b', 'c', 'd', 'e', 'f']) with self.assertRaisesRegex(ValueError, "not present.*'b'"): group(table, axis='feature', metadata=feature_mc, mode='sum')
def test_extra_metadata(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]], ids=['sample1', 'sample2', 'sample3']) md = qiime2.CategoricalMetadataColumn( pd.Series(['a', 'b', 'b', 'c'], name='a or b', index=pd.Index( ['sample1', 'sample2', 'sample3', 'sample4'], name='id'))) with tempfile.TemporaryDirectory() as output_dir: beta_group_significance(output_dir, dm, md, permutations=42) index_fp = os.path.join(output_dir, 'index.html') self.assertTrue('<td>2</td>' in open(index_fp).read())
def test_filtered_samples_str_metadata(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25, 0.66], [0.25, 0.00, 0.00, 0.66], [0.25, 0.00, 0.00, 0.66], [0.66, 0.66, 0.66, 0.00]], ids=['sample1', 'sample2', 'sample3', 'sample4']) md = qiime2.CategoricalMetadataColumn( pd.Series(['a', 'b', 'b', np.nan], name='a or b', index=pd.Index( ['sample1', 'sample2', 'sample3', 'sample4'], name='id'))) with tempfile.TemporaryDirectory() as output_dir: beta_group_significance(output_dir, dm, md) index_fp = os.path.join(output_dir, 'index.html') self.assertTrue('Warning' in open(index_fp).read())
def test_empty_metadata_values(self): # Trusting that the code is sane enough to not invent a distinction # between feature and sample metadata where there is none sample_mc = qiime2.CategoricalMetadataColumn( pd.Series(['a_new', 'a_new', None], name='foo', index=pd.Index(['a', 'b', 'c'], name='sampleid'))) sample_ids = sample_mc.to_series().index data = np.array([[1, 2, 3], [30, 20, 10]]) table = biom.Table(data, sample_ids=sample_ids, observation_ids=['x', 'y']) with self.assertRaisesRegex(ValueError, "missing.*value.*'c'"): group(table, axis='sample', metadata=sample_mc, mode='sum') nan_mc = qiime2.CategoricalMetadataColumn( pd.Series(['a_new', float('nan'), 'a_new'], name='foo', index=pd.Index(['a', 'b', 'c'], name='id'))) with self.assertRaisesRegex(ValueError, "missing.*value.*'b'"): group(table, axis='sample', metadata=nan_mc, mode='sum')
def setUp(self): super().setUp() self.md = qiime2.CategoricalMetadataColumn( pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=pd.Index([c for c in 'abcdef'], name='id'), name='foo')) tab = biom.Table(np.array([[13, 26, 37, 3, 6, 1], [33, 24, 23, 5, 6, 2], [38, 26, 33, 4, 1, 0], [3, 2, 1, 22, 25, 31], [2, 1, 3, 44, 46, 42]]), observation_ids=[c for c in 'vwxyz'], sample_ids=[c for c in 'abcdef']) self.tab = qiime2.Artifact.import_data('FeatureTable[Frequency]', tab)
def setUp(self): super().setUp() def _load_biom(table_fp): table_fp = self.get_data_path(table_fp) table = qiime2.Artifact.load(table_fp) table = table.view(biom.Table) return table def _load_md(md_fp): md_fp = self.get_data_path(md_fp) md = pd.read_csv(md_fp, sep='\t', header=0, index_col=0) md = qiime2.Metadata(md) return md def _load_nmc(md_fp, column): md_fp = self.get_data_path(md_fp) md = pd.read_csv(md_fp, sep='\t', header=0, index_col=0) md = qiime2.NumericMetadataColumn(md[column]) return md def _load_cmc(md_fp, column): md_fp = self.get_data_path(md_fp) md = pd.read_csv(md_fp, sep='\t', header=0, index_col=0) md = qiime2.CategoricalMetadataColumn(md[column]) return md self.table_chard_fp = _load_biom('chardonnay.table.qza') self.md_chard_fp = _load_md('chardonnay.map.txt') self.mdc_chard_fp = _load_cmc('chardonnay.map.txt', 'Region') self.table_ecam_fp = _load_biom('ecam-table-maturity.qza') self.md_ecam_fp = _load_md('ecam_map_maturity.txt') self.mdc_ecam_fp = _load_nmc('ecam_map_maturity.txt', 'month') self.exp_imp = pd.read_csv( self.get_data_path('importance.tsv'), sep='\t', header=0, index_col=0) self.exp_pred = pd.read_csv( self.get_data_path('predictions.tsv'), sep='\t', header=0, index_col=0, squeeze=True) index = pd.Index(['A', 'B', 'C', 'D'], name='id') self.table_percnorm = qiime2.Artifact.import_data( FeatureTable[PercentileNormalized], pd.DataFrame( [[20.0, 20.0, 50.0, 10.0], [10.0, 10.0, 70.0, 10.0], [90.0, 8.0, 1.0, 1.0], [30.0, 15.0, 20.0, 35.0]], index=index, columns=['feat1', 'feat2', 'feat3', 'feat4'])).view(biom.Table) self.mdc_percnorm = qiime2.CategoricalMetadataColumn( pd.Series(['X', 'X', 'Y', 'Y'], index=index, name='name'))
def setUp(self): super().setUp() self.exp_results = pd.read_csv( self.get_data_path('mock-3-results.tsv'), sep='\t', index_col=0) self.exp = qiime2.Artifact.load( self.get_data_path('qc-mock-3-expected.qza')).view(pd.DataFrame) self.obs = qiime2.Artifact.load( self.get_data_path('qc-mock-3-observed.qza')).view(pd.DataFrame) self.false_neg = pd.DataFrame( {'HMPMockV1.1.Even1': [0.047619, 0.047619, 0.047619], 'HMPMockV1.1.Even2': [0.047619, 0.047619, 0.047619], 'HMPMockV1.2.Staggered1': [0.2143622714, 0.0214362274, 0.0002143626], 'HMPMockV1.2.Staggered2': [0.2143622714, 0.0214362274, 0.0002143626]}, index=['k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;' 'f__Staphylococcaceae;g__Staphylococcus;s__aureus', 'k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;' 'f__Staphylococcaceae;g__Staphylococcus;s__epidermidis', 'k__Bacteria;p__Thermi;c__Deinococci;o__Deinococcales;' 'f__Deinococcaceae;g__Deinococcus;s__']) self.false_neg.index.name = 'Taxon' self.misclassified = pd.DataFrame( {'HMPMockV1.1.Even1': [0.08634], 'HMPMockV1.1.Even2': [0.0533176566813], 'HMPMockV1.2.Staggered1': [0.], 'HMPMockV1.2.Staggered2': [0.]}, index=['k__Bacteria;p__[Thermi];c__Deinococci;o__Deinococcales;' 'f__Deinococcaceae;g__Deinococcus;s__']) self.misclassified.index.name = 'Taxon' self.underclassified = pd.DataFrame( {'HMPMockV1.1.Even1': [0.536876], 'HMPMockV1.1.Even2': [0.577293], 'HMPMockV1.2.Staggered1': [0.639295], 'HMPMockV1.2.Staggered2': [0.666156]}, index=['k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;' 'f__Staphylococcaceae;g__Staphylococcus;__']) self.underclassified.index.name = 'Taxon' self.metadata = qiime2.CategoricalMetadataColumn( pd.Series(['HMPMockV1.1.Even1', 'HMPMockV1.1.Even1', 'HMPMockV1.2.Staggered1', 'HMPMockV1.2.Staggered1'], name='mock_id', index=pd.Index(['HMPMockV1.1.Even1', 'HMPMockV1.1.Even2', 'HMPMockV1.2.Staggered1', 'HMPMockV1.2.Staggered2'], name='id')))
def setUp(self): self.barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'), ('@s2/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s3/2 abc/2', 'AACC', '+', 'PPPP'), ('@s4/2 abc/2', 'TTAA', '+', 'PPPP'), ('@s5/2 abc/2', 'AACC', '+', 'PPPP'), ('@s6/2 abc/2', 'AAAA', '+', 'PPPP'), ('@s7/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s8/2 abc/2', 'GGAA', '+', 'PPPP'), ('@s9/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s10/2 abc/2', 'CGGC', '+', 'PPPP'), ('@s11/2 abc/2', 'GGAA', '+', 'PPPP')] self.forward = [('@s1/1 abc/1', 'GGG', '+', 'YYY'), ('@s2/1 abc/1', 'CCC', '+', 'PPP'), ('@s3/1 abc/1', 'AAA', '+', 'PPP'), ('@s4/1 abc/1', 'TTT', '+', 'PPP'), ('@s5/1 abc/1', 'ATA', '+', 'PPP'), ('@s6/1 abc/1', 'TAT', '+', 'PPP'), ('@s7/1 abc/1', 'CGC', '+', 'PPP'), ('@s8/1 abc/1', 'GCG', '+', 'PPP'), ('@s9/1 abc/1', 'ACG', '+', 'PPP'), ('@s10/1 abc/1', 'GCA', '+', 'PPP'), ('@s11/1 abc/1', 'TGA', '+', 'PPP')] self.reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'), ('@s2/1 abc/1', 'GGG', '+', 'PPP'), ('@s3/1 abc/1', 'TTT', '+', 'PPP'), ('@s4/1 abc/1', 'AAA', '+', 'PPP'), ('@s5/1 abc/1', 'TAT', '+', 'PPP'), ('@s6/1 abc/1', 'ATA', '+', 'PPP'), ('@s7/1 abc/1', 'GCG', '+', 'PPP'), ('@s8/1 abc/1', 'CGC', '+', 'PPP'), ('@s9/1 abc/1', 'CGT', '+', 'PPP'), ('@s10/1 abc/1', 'TGC', '+', 'PPP'), ('@s11/1 abc/1', 'TCA', '+', 'PPP')] self.bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, self.forward, self.reverse) barcode_map = pd.Series( ['AAAA', 'AACC', 'TTAA', 'GGAA', 'CGGC'], name='bc', index=pd.Index( ['sample1', 'sample2', 'sample3', 'sample4', 'sample5'], name='id')) self.barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)
def test_empty_only_in_superset(self): # Trusting that the code is sane enough to not invent a distinction # between feature and sample metadata where there is none sample_mc = qiime2.CategoricalMetadataColumn( pd.Series(['a_new', 'a_new', 'b_new', None], name='foo', index=pd.Index(['a', 'b', 'c', 'd'], name='sampleid'))) data = np.array([[1, 2, 3], [30, 20, 10]]) table = biom.Table(data, sample_ids=['a', 'b', 'c'], observation_ids=['x', 'y']) expected = biom.Table(np.array([[2, 3], [25, 10]]), sample_ids=['a_new', 'b_new'], observation_ids=['x', 'y']) result = group(table, axis='sample', metadata=sample_mc, mode='mean-ceiling') self.assertEqual(expected, result)
def test_seqs_restrict_metadata(self): context_seqs = self.get_data_path('context-seqs-4.fasta') context_seqs = DNAFASTAFormat(context_seqs, 'r') s = pd.Series(['2019-11-01', '2020-01-17'], index=['B', 'U']) s.index.name = 'id' s.name = 'date-md' exp_md = qiime2.CategoricalMetadataColumn(s) for _ in range(self._N_TEST_ITERATIONS): sel = sample_longitudinal(self.md2, context_seqs) self.assertEqual(sel.inclusion.sum(), 2) self.assertTrue(sel.inclusion['B']) self.assertTrue(sel.inclusion['U']) self.assertEqual(sel.metadata.get_column('date-md'), exp_md) self.assertEqual(sel.label, 'sample_longitudinal')
def test_confusion_matrix_dtype_coercion(self): predictions = pd.Series([1, 1, 1, 2, 2, 2], index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sample_id'), name='features') # NOTE: the targets are numbers but represented as str truth = qiime2.CategoricalMetadataColumn( pd.Series(['1', '2', '1', '2', '1', '2'], index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sample-id'), name='target')) confusion_matrix(self.tmpd, predictions, truth) self.assertTrue('index.html' in listdir(self.tmpd))
def test_ancom_no_volcano_plot(self): t = pd.DataFrame([[1, 1], [1, 1], [1, 1], [1, 1]], index=['S1', 'S2', 'S3', 'S4'], columns=['O1', 'O2']) c = qiime2.CategoricalMetadataColumn( pd.Series(['0', '0', '1', '2'], name='n', index=pd.Index(['S1', 'S2', 'S3', 'S4'], name='id'))) ancom(output_dir=self.temp_dir.name, table=t + 1, metadata=c) index_fp = os.path.join(self.temp_dir.name, 'index.html') self.assertTrue(os.path.exists(index_fp)) self.assertTrue(os.path.getsize(index_fp) > 0) with open(index_fp) as fh: f = fh.read() self.assertTrue('Unable to generate volcano plot' in f)
def test_missing_sample_ids(self): sample_mc = qiime2.CategoricalMetadataColumn( pd.Series(['g0', 'g2', 'g0', 'g2'], name='foo', index=pd.Index(['s1', 's3', 's4', 's6'], name='sampleid'))) data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]]) table = biom.Table(data, sample_ids=['s1', 's2', 's4', 's5'], observation_ids=['x', 'y', 'z']) with self.assertRaisesRegex(ValueError, 'not present.*s2.*s5') as e: group(table, axis='sample', metadata=sample_mc, mode='sum') self.assertIn('s2', str(e.exception)) self.assertIn('s5', str(e.exception))
def test_ancom_no_tables(self): t = pd.DataFrame([[2, 1, 2], [2, 2, 2], [2, 2, 2]], index=['S1', 'S2', 'S3'], columns=['O1', 'O2', 'O3']) c = qiime2.CategoricalMetadataColumn( pd.Series(['0', '0', '1'], name='n', index=pd.Index(['S1', 'S2', 'S3'], name='id'))) ancom(output_dir=self.temp_dir.name, table=t + 1, metadata=c) index_fp = os.path.join(self.temp_dir.name, 'index.html') self.assertTrue(os.path.exists(index_fp)) self.assertTrue(os.path.getsize(index_fp) > 0) with open(index_fp) as fh: f = fh.read() self.assertTrue('No significant features found' in f)
def test_numeric_strings(self): data = np.array([[1, 2, 3], [30, 20, 10]]) table = biom.Table(data, sample_ids=['a', 'b', 'c'], observation_ids=['x', 'y']) sample_mc = qiime2.CategoricalMetadataColumn( pd.Series(['-4.2', '-4.2', '-4.2'], name='foo', index=pd.Index(['a', 'b', 'c'], name='sampleid'))) expected = biom.Table(np.array([[6], [60]]), sample_ids=['-4.2'], observation_ids=['x', 'y']) result = group(table, axis='sample', metadata=sample_mc, mode='sum') self.assertEqual(expected, result)
def test_superset_sample_group(self): sample_mc = qiime2.CategoricalMetadataColumn( pd.Series(['g0', 'g1', 'g2', 'g0', 'g1', 'g2'], name='foo', index=pd.Index(['s1', 's2', 's3', 's4', 's5', 's6'], name='sampleid'))) data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]]) table = biom.Table(data, sample_ids=['s1', 's2', 's4', 's5'], observation_ids=['x', 'y', 'z']) expected = biom.Table(np.array([[2, 4], [22, 24], [220, 240]]), sample_ids=['g0', 'g1'], observation_ids=['x', 'y', 'z']) result = group(table, axis='sample', metadata=sample_mc, mode='sum') self.assertEqual(expected, result)
def test_ancom(self): t = pd.DataFrame( [[9, 9, 9, 19, 19, 19], [10, 11, 10, 20, 20, 20], [9, 10, 9, 9, 10, 9], [9, 10, 9, 9, 9, 8], [9, 10, 9, 9, 9, 9], [9, 10, 9, 9, 9, 10], [9, 12, 9, 9, 9, 11]], index=['O1', 'O2', 'O3', 'O4', 'O5', 'O6', 'O7'], columns=['S1', 'S2', 'S3', 'S4', 'S5', 'S6']).T c = qiime2.CategoricalMetadataColumn( pd.Series(['a', 'a', 'a', '1', '1', '1'], name='n', index=pd.Index(['S1', 'S2', 'S3', 'S4', 'S5', 'S6'], name='id'))) ancom(output_dir=self.temp_dir.name, table=t + 1, metadata=c) res = pd.read_csv(os.path.join(self.temp_dir.name, 'ancom.tsv'), index_col=0, sep='\t') exp = pd.DataFrame( { 'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool) }, index=['O1', 'O2', 'O3', 'O4', 'O5', 'O6', 'O7'], ) pdt.assert_frame_equal(res, exp) index_fp = os.path.join(self.temp_dir.name, 'index.html') self.assertTrue(os.path.exists(index_fp)) self.assertTrue(os.path.getsize(index_fp) > 0) data_fp = os.path.join(self.temp_dir.name, 'data.tsv') self.assertTrue(os.path.exists(data_fp)) self.assertTrue(os.path.getsize(data_fp) > 0) tsv_fp = os.path.join(self.temp_dir.name, 'percent-abundances.tsv') self.assertTrue(os.path.exists(tsv_fp)) self.assertTrue(os.path.getsize(tsv_fp) > 0) with open(index_fp, 'r') as fh: html = fh.read() self.assertIn('<th>Percentile</th>', html) self.assertIn('<th>Group</th>', html) self.assertIn('<th>O1</th>', html)
def test_rev_comp_mapping_barcodes(self): barcodes = pd.Series( ['TTTT', 'GGTT', 'TTAA', 'TTCC', 'GCCG'], name='bc', index=pd.Index( ['sample1', 'sample2', 'sample3', 'sample4', 'sample5'], name='id')) barcodes = qiime2.CategoricalMetadataColumn(barcodes) actual = emp_single(self.bsi, barcodes, rev_comp_mapping_barcodes=True) output_fastq = list(actual.sequences.iter_views(FastqGzFormat)) # five per-sample files were written self.assertEqual(len(output_fastq), 5) # sequences in sample1 are correct self._validate_sample_fastq(output_fastq[0][1].open(), self.sequences, [0, 5]) # sequences in sample2 are correct self._validate_sample_fastq(output_fastq[1][1].open(), self.sequences, [2, 4]) # sequences in sample3 are correct self._validate_sample_fastq(output_fastq[2][1].open(), self.sequences, [1, 3]) # sequences in sample4 are correct self._validate_sample_fastq(output_fastq[3][1].open(), self.sequences, [7, 10]) # sequences in sample5 are correct self._validate_sample_fastq(output_fastq[4][1].open(), self.sequences, [6, 8, 9]) # manifest is correct act_manifest = list(actual.manifest.view(FastqManifestFormat).open()) exp_manifest = [ 'sample-id,filename,direction\n', 'sample1,sample1_1_L001_R1_001.fastq.gz,forward\n', 'sample3,sample3_2_L001_R1_001.fastq.gz,forward\n', 'sample2,sample2_3_L001_R1_001.fastq.gz,forward\n', 'sample5,sample5_4_L001_R1_001.fastq.gz,forward\n', 'sample4,sample4_5_L001_R1_001.fastq.gz,forward\n' ] self._compare_manifests(act_manifest, exp_manifest)
def test_superset_feature_group(self): feature_mc = qiime2.CategoricalMetadataColumn( pd.Series(['g0', 'g0', 'g1', 'g2', 'g1', 'g2', 'extra'], name='foo', index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f', 'g'], name='featureid'))) data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5], [0, 1, 100], [7, 8, 9]]) # g is missing on purpose table = biom.Table(data, sample_ids=['s1', 's2', 's3'], observation_ids=['a', 'b', 'c', 'd', 'e', 'f']) expected = biom.Table(np.array([[2, 10, 10], [0, 1, 200], [12, 13, 14]]), sample_ids=['s1', 's2', 's3'], observation_ids=['g0', 'g1', 'g2']) result = group(table, axis='feature', metadata=feature_mc, mode='sum') self.assertEqual(expected, result)
def setUp(self): super().setUp() self.md = qiime2.CategoricalMetadataColumn(pd.Series( ['a', 'a', 'a', 'b', 'b', 'b'], index=pd.Index([c for c in 'abcdef'], name='id'), name='foo')) tab = biom.Table(np.array( [[13, 26, 37, 3, 6, 1], [33, 24, 23, 5, 6, 2], [38, 26, 33, 4, 1, 0], [3, 2, 1, 22, 25, 31], [2, 1, 3, 44, 46, 42]]), observation_ids=[c for c in 'vwxyz'], sample_ids=[c for c in 'abcdef']) self.tab = qiime2.Artifact.import_data('FeatureTable[Frequency]', tab) dist = skbio.DistanceMatrix.from_iterable( iterable=[1, 16, 2, 1, 16, 17], metric=lambda x, y: abs(y-x), keys=[c for c in 'abcdef'] ) self.dist = qiime2.Artifact.import_data('DistanceMatrix', dist)
def test_ancom_zero_division(self): t = pd.DataFrame([[10, 0], [11, 0], [12, 0], [13, 0], [1000, 10], [1000, 10]], index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'], columns=['O1', 'O2']) c = qiime2.CategoricalMetadataColumn( pd.Series(['0', '0', '1', '1', '2', '2'], name='n', index=pd.Index(['S1', 'S2', 'S3', 'S4', 'S5', 'S6'], name='id')) ) ancom(output_dir=self.temp_dir.name, table=t+1, metadata=c, transform_function='log') with open(os.path.join(self.temp_dir.name, 'index.html')) as fh: f = fh.read() self.assertFalse('Infinity' in f) self.assertTrue( 'non-numeric results:\n <strong>O2</strong>' in f)
def test_subsample_higher_than_seqs_count(self): barcodes = self.barcodes[:1] sequences = self.sequences[:1] bsi = BarcodeSequenceFastqIterator(barcodes, sequences) barcode_map = pd.Series(['AAAA'], name='bc', index=pd.Index(['sample1'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_single(bsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=False), n=50) self.assertTrue(result is None) plot_fp = os.path.join(output_dir, 'quality-plot.html') with open(plot_fp, 'r') as fh: html = fh.read() self.assertIn('<strong>Warning:</strong>', html)
def test_basic(self): bsi = BarcodeSequenceFastqIterator(self.barcodes, self.sequences) barcode_map = pd.Series(['AAAA', 'AACC'], name='bc', index=pd.Index(['sample_1', 'sample2'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_single(bsi, barcode_map) # test that an index.html file is created and that it has size > 0 with tempfile.TemporaryDirectory() as output_dir: # TODO: Remove _PlotQualView wrapper result = summarize(output_dir, _PlotQualView(demux_data, paired=False), n=2) self.assertTrue(result is None) index_fp = os.path.join(output_dir, 'overview.html') self.assertTrue(os.path.exists(index_fp)) self.assertTrue(os.path.getsize(index_fp) > 0) csv_fp = os.path.join(output_dir, 'per-sample-fastq-counts.csv') self.assertTrue(os.path.exists(csv_fp)) self.assertTrue(os.path.getsize(csv_fp) > 0) pdf_fp = os.path.join(output_dir, 'demultiplex-summary.pdf') self.assertTrue(os.path.exists(pdf_fp)) self.assertTrue(os.path.getsize(pdf_fp) > 0) png_fp = os.path.join(output_dir, 'demultiplex-summary.png') self.assertTrue(os.path.exists(png_fp)) self.assertTrue(os.path.getsize(png_fp) > 0) qual_forward_fp = os.path.join( output_dir, 'forward-seven-number-summaries.csv') self.assertTrue(os.path.exists(qual_forward_fp)) self.assertTrue(os.path.getsize(qual_forward_fp) > 0) with open(index_fp, 'r') as fh: html = fh.read() self.assertIn('<td>Minimum:</td><td>1</td>', html) self.assertIn('<td>Maximum:</td><td>3</td>', html) with open(csv_fp, 'r') as ch: csv = ch.read() self.assertIn('sample_1', csv)
def test_chain_with_metadata(self): df = pd.DataFrame({'a': ['1', '2', '3']}, index=pd.Index(['0', '1', '2'], name='feature ID')) a = qiime2.Artifact.import_data('IntSequence1', [1, 2, 3]) m = qiime2.Metadata(df) mc = qiime2.CategoricalMetadataColumn(df['a']) b = dummy_plugin.actions.identity_with_metadata(a, m).out c = dummy_plugin.actions.identity_with_metadata_column(b, mc).out p_dir = c._archiver.provenance_dir new_m = qiime2.Metadata.load( str(p_dir / 'artifacts' / str(b.uuid) / 'action' / 'metadata.tsv')) pdt.assert_frame_equal(m.to_dataframe(), new_m.to_dataframe()) with (p_dir / 'action' / 'metadata.tsv').open() as fh: self.assertEqual( fh.read(), 'feature ID\ta\n#q2:types\tcategorical\n0\t1\n1\t2\n2\t3\n')
def setUp(self): super().setUp() md = pd.Series(['a', 'a', 'b', 'b', 'b'], index=['a', 'b', 'c', 'd', 'e'], name='bugs') md.index.name = 'SampleID' self.md = qiime2.CategoricalMetadataColumn(md) tab = biom.Table( np.array([[3, 6, 7, 3, 6], [3, 4, 5, 6, 2], [8, 6, 4, 1, 0], [8, 6, 4, 1, 0], [8, 6, 4, 1, 0]]), observation_ids=['v', 'w', 'x', 'y', 'z'], sample_ids=['a', 'b', 'c', 'd', 'e']) self.tab = qiime2.Artifact.import_data('FeatureTable[Frequency]', tab) md2 = pd.DataFrame({'trash': ['a', 'a', 'b', 'b', 'b', 'junk'], 'floats': [0.1, 0.1, 1.3, 1.8, 1000.1, 0.1], 'ints': [0, 1, 2, 2, 2, 0], 'nans': [1, 1, 2, 2, np.nan, np.nan], 'negatives': [-7, -3, -1.2, -4, -9, -1]}, index=['a', 'b', 'c', 'd', 'e', 'peanut']) md2.index.name = 'SampleID' self.md2 = qiime2.Metadata(md2)
def test_paired_end(self): barcodes = self.barcodes[:3] forward = self.sequences[:3] reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'), ('@s2/1 abc/1', 'GGG', '+', 'PPP'), ('@s3/1 abc/1', 'TTT', '+', 'PPP')] bpsi = BarcodePairedSequenceFastqIterator(barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'], name='bc', index=pd.Index( ['sample1', 'sample2', 'sample3'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_paired(bpsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=True), n=2) self.assertTrue(result is None) plot_fp = os.path.join(output_dir, 'quality-plot.html') qual_forward_fp = os.path.join( output_dir, 'forward-seven-number-summaries.csv') self.assertTrue(os.path.exists(qual_forward_fp)) self.assertTrue(os.path.getsize(qual_forward_fp) > 0) qual_reverse_fp = os.path.join( output_dir, 'reverse-seven-number-summaries.csv') self.assertTrue(os.path.exists(qual_reverse_fp)) self.assertTrue(os.path.getsize(qual_reverse_fp) > 0) with open(plot_fp, 'r') as fh: html = fh.read() self.assertIn('<h5 class="text-center">Forward Reads</h5>', html) self.assertIn('<h5 class="text-center">Reverse Reads</h5>', html)
def test_ancom_integer_indices(self): # The idea behind this test is to use integer indices to confirm # that the metadata column mapping is joining on labels, not on # indices. If it was joining on the index, the metadata would map in # the opposite direction, resulting in no significant results being # rendered to the output HTML table. t = pd.DataFrame( [[9, 9, 9, 19, 19, 19], [10, 11, 10, 20, 20, 20], [9, 10, 9, 9, 10, 9], [9, 10, 9, 9, 9, 8], [9, 10, 9, 9, 9, 9], [9, 10, 9, 9, 9, 10], [9, 12, 9, 9, 9, 11]], index=['O1', 'O2', 'O3', 'O4', 'O5', 'O6', 'O7'], columns=['1', '2', '3', '4', '5', '6']).T c = qiime2.CategoricalMetadataColumn( pd.Series(['1', '0', '0', '0', '1', '0'], name='n', index=pd.Index(['6', '5', '4', '3', '2', '1'], name='id'))) ancom(output_dir=self.temp_dir.name, table=t + 1, metadata=c) index_fp = os.path.join(self.temp_dir.name, 'index.html') with open(index_fp, 'r') as fh: html = fh.read() self.assertIn('<th>O7</th>', html)
def test_anosim_pairwise(self): dm = skbio.DistanceMatrix( [[0.00, 0.25, 0.25], [0.25, 0.00, 0.00], [0.25, 0.00, 0.00]], ids=['sample1', 'sample2', 'sample3']) md = qiime2.CategoricalMetadataColumn( pd.Series(['a', 'b', 'b'], name='a or b', index=pd.Index(['sample1', 'sample2', 'sample3'], name='id'))) with tempfile.TemporaryDirectory() as output_dir: beta_group_significance(output_dir, dm, md, method='anosim', permutations=42, pairwise=True) index_fp = os.path.join(output_dir, 'index.html') self.assertTrue(os.path.exists(index_fp)) # all expected boxplots are generated self.assertTrue( os.path.exists(os.path.join(output_dir, 'a-boxplots.pdf'))) self.assertTrue( os.path.exists(os.path.join(output_dir, 'a-boxplots.png'))) self.assertTrue( os.path.exists(os.path.join(output_dir, 'b-boxplots.pdf'))) self.assertTrue( os.path.exists(os.path.join(output_dir, 'b-boxplots.png'))) # no extra boxplots are generated self.assertEqual(len(glob.glob('%s/*-boxplots.pdf' % output_dir)), 2) self.assertEqual(len(glob.glob('%s/*-boxplots.png' % output_dir)), 2) self.assertTrue('ANOSIM results' in open(index_fp).read()) self.assertTrue('<td>42</td>' in open(index_fp).read()) self.assertFalse('Warning' in open(index_fp).read()) self.assertTrue('Pairwise anosim' in open(index_fp).read())
def test_inconsistent_sequence_length_paired(self): forward = [('@s1/1 abc/1', 'G', '+', 'Y'), ('@s2/1 abc/1', 'CCC', '+', 'PPP'), ('@s3/1 abc/1', 'AAAAA', '+', 'PPPPP'), ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')] reverse = [('@s1/1 abc/1', 'AAAAAAA', '+', 'YYYYYYY'), ('@s2/1 abc/1', 'TTTTT', '+', 'PPPPP'), ('@s3/1 abc/1', 'GGG', '+', 'PPP'), ('@s4/1 abc/1', 'C', '+', 'P')] bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC'], name='bc', index=pd.Index(['sample1', 'sample2'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_paired(bpsi, barcode_map) lengths = [1, 3, 5, 7] for n in range(1, 6): with tempfile.TemporaryDirectory() as output_dir: lengths_ = lengths[0:5 - n] if n < 4 else [1] # TODO: Remove _PlotQualView wrapper summarize(output_dir, _PlotQualView(demux_data, paired=True), n=n) plot_fp = os.path.join(output_dir, 'data.jsonp') with open(plot_fp, 'r') as fh: jsonp = fh.read() json_ = jsonp.replace('app.init(', '[').replace(');', ']') payload = json.loads(json_)[0] self.assertEqual(payload["totalSeqCount"], 4) self.assertIn(payload["minSeqLen"]["forward"], lengths_) self.assertIn(payload["minSeqLen"]["reverse"], lengths_) self.assertEqual(payload["n"], min(n, 4))
def test_confusion_matrix_vmax_too_low(self): b = qiime2.CategoricalMetadataColumn(self.a) with self.assertRaisesRegex( ValueError, r'vmax must be greater than.*' r'\s\s0\.5.*less.*1\.0'): confusion_matrix(self.tmpd, self.a, b, vmin=None, vmax=.5)