def test_duplicate_barcodes(self): barcodes = pd.Series(['AACC', 'AACC'], name='bc', index=pd.Index(['sample1', 'sample2'], name='id')) barcodes = qiime2.CategoricalMetadataColumn(barcodes) with self.assertRaises(ValueError): emp_paired(self.bpsi, barcodes)
def test_sequence_length_uses_subsample_paired(self): random.seed(6) # Will select s1 and s2 which aren't the shortest pairs forward = [('@s1/1 abc/1', 'GGG', '+', 'YYY'), ('@s2/1 abc/1', 'CCCCC', '+', 'PPPPP'), ('@s3/1 abc/1', 'A', '+', 'P'), ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')] reverse = [('@s1/1 abc/1', 'AAAAA', '+', 'YYYYY'), ('@s2/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP'), ('@s3/1 abc/1', 'GGG', '+', 'PPP'), ('@s4/1 abc/1', 'C', '+', 'P')] bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC'], name='bc', index=pd.Index(['sample1', 'sample2'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_paired(bpsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: summarize(output_dir, _PlotQualView(demux_data, paired=True), n=2) plot_fp = os.path.join(output_dir, 'data.jsonp') with open(plot_fp, 'r') as fh: jsonp = fh.read() json_ = jsonp.replace('app.init(', '[').replace(');', ']') payload = json.loads(json_)[0] self.assertEqual(payload["minSeqLen"]["forward"], 3) self.assertEqual(payload["minSeqLen"]["reverse"], 5)
def test_paired_end(self): barcodes = self.barcodes[:3] forward = self.sequences[:3] reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'), ('@s2/1 abc/1', 'GGG', '+', 'PPP'), ('@s3/1 abc/1', 'TTT', '+', 'PPP')] bpsi = BarcodePairedSequenceFastqIterator(barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'], name='bc', index=pd.Index( ['sample1', 'sample2', 'sample3'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_paired(bpsi, barcode_map) with tempfile.TemporaryDirectory() as output_dir: result = summarize(output_dir, _PlotQualView(demux_data, paired=True), n=2) self.assertTrue(result is None) plot_fp = os.path.join(output_dir, 'quality-plot.html') with open(plot_fp, 'r') as fh: html = fh.read() self.assertIn('<h5 class="text-center">Forward Reads</h5>', html) self.assertIn('<h5 class="text-center">Reverse Reads</h5>', html)
def test_inconsistent_sequence_length_paired(self): forward = [('@s1/1 abc/1', 'G', '+', 'Y'), ('@s2/1 abc/1', 'CCC', '+', 'PPP'), ('@s3/1 abc/1', 'AAAAA', '+', 'PPPPP'), ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')] reverse = [('@s1/1 abc/1', 'AAAAAAA', '+', 'YYYYYYY'), ('@s2/1 abc/1', 'TTTTT', '+', 'PPPPP'), ('@s3/1 abc/1', 'GGG', '+', 'PPP'), ('@s4/1 abc/1', 'C', '+', 'P')] bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward, reverse) barcode_map = pd.Series(['AAAA', 'AACC'], name='bc', index=pd.Index(['sample1', 'sample2'], name='id')) barcode_map = qiime2.CategoricalMetadataColumn(barcode_map) demux_data = emp_paired(bpsi, barcode_map) lengths = [1, 3, 5, 7] for n in range(1, 6): with tempfile.TemporaryDirectory() as output_dir: lengths_ = lengths[0:5 - n] if n < 4 else [1] # TODO: Remove _PlotQualView wrapper summarize(output_dir, _PlotQualView(demux_data, paired=True), n=n) plot_fp = os.path.join(output_dir, 'data.jsonp') with open(plot_fp, 'r') as fh: jsonp = fh.read() json_ = jsonp.replace('app.init(', '[').replace(');', ']') payload = json.loads(json_)[0] self.assertEqual(payload["totalSeqCount"], 4) self.assertIn(payload["minSeqLen"]["forward"], lengths_) self.assertIn(payload["minSeqLen"]["reverse"], lengths_) self.assertEqual(payload["n"], min(n, 4))
def check_valid(self, *args, **kwargs): actual = emp_paired(*args, **kwargs) # five forward sample files forward_fastq = [ view for path, view in actual.sequences.iter_views(FastqGzFormat) if 'R1_001.fastq' in path.name ] self.assertEqual(len(forward_fastq), 5) # five reverse sample files reverse_fastq = [ view for path, view in actual.sequences.iter_views(FastqGzFormat) if 'R2_001.fastq' in path.name ] self.assertEqual(len(reverse_fastq), 5) # FORWARD: # sequences in sample1 are correct self._validate_sample_fastq(forward_fastq[0].open(), self.forward, [0, 5]) # sequences in sample2 are correct self._validate_sample_fastq(forward_fastq[1].open(), self.forward, [2, 4]) # sequences in sample3 are correct self._validate_sample_fastq(forward_fastq[2].open(), self.forward, [1, 3]) # sequences in sample4 are correct self._validate_sample_fastq(forward_fastq[3].open(), self.forward, [7, 10]) # sequences in sample5 are correct self._validate_sample_fastq(forward_fastq[4].open(), self.forward, [6, 8, 9]) # REVERSE: # sequences in sample1 are correct self._validate_sample_fastq(reverse_fastq[0].open(), self.reverse, [0, 5]) # sequences in sample2 are correct self._validate_sample_fastq(reverse_fastq[1].open(), self.reverse, [2, 4]) # sequences in sample3 are correct self._validate_sample_fastq(reverse_fastq[2].open(), self.reverse, [1, 3]) # sequences in sample4 are correct self._validate_sample_fastq(reverse_fastq[3].open(), self.reverse, [7, 10]) # sequences in sample5 are correct self._validate_sample_fastq(reverse_fastq[4].open(), self.reverse, [6, 8, 9]) # manifest is correct act_manifest = list(actual.manifest.view(FastqManifestFormat).open()) exp_manifest = [ 'sample-id,filename,direction\n', 'sample1,sample1_1_L001_R1_001.fastq.gz,forward\n', 'sample1,sample1_1_L001_R2_001.fastq.gz,reverse\n', 'sample3,sample3_2_L001_R1_001.fastq.gz,forward\n', 'sample3,sample3_2_L001_R2_001.fastq.gz,reverse\n', 'sample2,sample2_3_L001_R1_001.fastq.gz,forward\n', 'sample2,sample2_3_L001_R2_001.fastq.gz,reverse\n', 'sample5,sample5_4_L001_R1_001.fastq.gz,forward\n', 'sample5,sample5_4_L001_R2_001.fastq.gz,reverse\n', 'sample4,sample4_5_L001_R1_001.fastq.gz,forward\n', 'sample4,sample4_5_L001_R2_001.fastq.gz,reverse\n' ] self._compare_manifests(act_manifest, exp_manifest) # metadata is correct act_metadata = list(actual.metadata.view(YamlFormat).open()) exp_metadata = ["{phred-offset: 33}\n"] self.assertEqual(act_metadata, exp_metadata)
def test_no_matched_barcodes(self): barcodes = pd.Series(['CCCC', 'GGCC'], index=['sample1', 'sample2']) barcodes = qiime2.MetadataCategory(barcodes) with self.assertRaises(ValueError): emp_paired(self.bpsi, barcodes)
def test_variable_length_barcodes(self): barcodes = pd.Series(['AAA', 'AACC'], index=['sample1', 'sample2']) barcodes = qiime2.MetadataCategory(barcodes) with self.assertRaises(ValueError): emp_paired(self.bpsi, barcodes)