Exemplo n.º 1
0
 def test_duplicate_barcodes(self):
     barcodes = pd.Series(['AACC', 'AACC'],
                          name='bc',
                          index=pd.Index(['sample1', 'sample2'], name='id'))
     barcodes = qiime2.CategoricalMetadataColumn(barcodes)
     with self.assertRaises(ValueError):
         emp_paired(self.bpsi, barcodes)
Exemplo n.º 2
0
    def test_sequence_length_uses_subsample_paired(self):
        random.seed(6)  # Will select s1 and s2 which aren't the shortest pairs

        forward = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
                   ('@s2/1 abc/1', 'CCCCC', '+', 'PPPPP'),
                   ('@s3/1 abc/1', 'A', '+', 'P'),
                   ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')]
        reverse = [('@s1/1 abc/1', 'AAAAA', '+', 'YYYYY'),
                   ('@s2/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP'),
                   ('@s3/1 abc/1', 'GGG', '+', 'PPP'),
                   ('@s4/1 abc/1', 'C', '+', 'P')]
        bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward,
                                                  reverse)

        barcode_map = pd.Series(['AAAA', 'AACC'],
                                name='bc',
                                index=pd.Index(['sample1', 'sample2'],
                                               name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_paired(bpsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            summarize(output_dir, _PlotQualView(demux_data, paired=True), n=2)
            plot_fp = os.path.join(output_dir, 'data.jsonp')
            with open(plot_fp, 'r') as fh:
                jsonp = fh.read()
                json_ = jsonp.replace('app.init(', '[').replace(');', ']')
                payload = json.loads(json_)[0]
                self.assertEqual(payload["minSeqLen"]["forward"], 3)
                self.assertEqual(payload["minSeqLen"]["reverse"], 5)
Exemplo n.º 3
0
    def test_paired_end(self):
        barcodes = self.barcodes[:3]

        forward = self.sequences[:3]

        reverse = [('@s1/1 abc/1', 'CCC', '+', 'YYY'),
                   ('@s2/1 abc/1', 'GGG', '+', 'PPP'),
                   ('@s3/1 abc/1', 'TTT', '+', 'PPP')]

        bpsi = BarcodePairedSequenceFastqIterator(barcodes, forward, reverse)

        barcode_map = pd.Series(['AAAA', 'AACC', 'TTAA'],
                                name='bc',
                                index=pd.Index(
                                    ['sample1', 'sample2', 'sample3'],
                                    name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_paired(bpsi, barcode_map)
        with tempfile.TemporaryDirectory() as output_dir:
            result = summarize(output_dir,
                               _PlotQualView(demux_data, paired=True),
                               n=2)
            self.assertTrue(result is None)
            plot_fp = os.path.join(output_dir, 'quality-plot.html')
            with open(plot_fp, 'r') as fh:
                html = fh.read()
                self.assertIn('<h5 class="text-center">Forward Reads</h5>',
                              html)
                self.assertIn('<h5 class="text-center">Reverse Reads</h5>',
                              html)
Exemplo n.º 4
0
    def test_inconsistent_sequence_length_paired(self):
        forward = [('@s1/1 abc/1', 'G', '+', 'Y'),
                   ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
                   ('@s3/1 abc/1', 'AAAAA', '+', 'PPPPP'),
                   ('@s4/1 abc/1', 'TTTTTTT', '+', 'PPPPPPP')]
        reverse = [('@s1/1 abc/1', 'AAAAAAA', '+', 'YYYYYYY'),
                   ('@s2/1 abc/1', 'TTTTT', '+', 'PPPPP'),
                   ('@s3/1 abc/1', 'GGG', '+', 'PPP'),
                   ('@s4/1 abc/1', 'C', '+', 'P')]
        bpsi = BarcodePairedSequenceFastqIterator(self.barcodes, forward,
                                                  reverse)

        barcode_map = pd.Series(['AAAA', 'AACC'],
                                name='bc',
                                index=pd.Index(['sample1', 'sample2'],
                                               name='id'))
        barcode_map = qiime2.CategoricalMetadataColumn(barcode_map)

        demux_data = emp_paired(bpsi, barcode_map)
        lengths = [1, 3, 5, 7]
        for n in range(1, 6):
            with tempfile.TemporaryDirectory() as output_dir:
                lengths_ = lengths[0:5 - n] if n < 4 else [1]
                # TODO: Remove _PlotQualView wrapper
                summarize(output_dir,
                          _PlotQualView(demux_data, paired=True),
                          n=n)
                plot_fp = os.path.join(output_dir, 'data.jsonp')
                with open(plot_fp, 'r') as fh:
                    jsonp = fh.read()
                    json_ = jsonp.replace('app.init(', '[').replace(');', ']')
                    payload = json.loads(json_)[0]
                    self.assertEqual(payload["totalSeqCount"], 4)
                    self.assertIn(payload["minSeqLen"]["forward"], lengths_)
                    self.assertIn(payload["minSeqLen"]["reverse"], lengths_)
                    self.assertEqual(payload["n"], min(n, 4))
Exemplo n.º 5
0
    def check_valid(self, *args, **kwargs):
        actual = emp_paired(*args, **kwargs)

        # five forward sample files
        forward_fastq = [
            view for path, view in actual.sequences.iter_views(FastqGzFormat)
            if 'R1_001.fastq' in path.name
        ]
        self.assertEqual(len(forward_fastq), 5)

        # five reverse sample files
        reverse_fastq = [
            view for path, view in actual.sequences.iter_views(FastqGzFormat)
            if 'R2_001.fastq' in path.name
        ]
        self.assertEqual(len(reverse_fastq), 5)

        # FORWARD:
        # sequences in sample1 are correct
        self._validate_sample_fastq(forward_fastq[0].open(), self.forward,
                                    [0, 5])

        # sequences in sample2 are correct
        self._validate_sample_fastq(forward_fastq[1].open(), self.forward,
                                    [2, 4])

        # sequences in sample3 are correct
        self._validate_sample_fastq(forward_fastq[2].open(), self.forward,
                                    [1, 3])

        # sequences in sample4 are correct
        self._validate_sample_fastq(forward_fastq[3].open(), self.forward,
                                    [7, 10])

        # sequences in sample5 are correct
        self._validate_sample_fastq(forward_fastq[4].open(), self.forward,
                                    [6, 8, 9])

        # REVERSE:
        # sequences in sample1 are correct
        self._validate_sample_fastq(reverse_fastq[0].open(), self.reverse,
                                    [0, 5])

        # sequences in sample2 are correct
        self._validate_sample_fastq(reverse_fastq[1].open(), self.reverse,
                                    [2, 4])

        # sequences in sample3 are correct
        self._validate_sample_fastq(reverse_fastq[2].open(), self.reverse,
                                    [1, 3])

        # sequences in sample4 are correct
        self._validate_sample_fastq(reverse_fastq[3].open(), self.reverse,
                                    [7, 10])

        # sequences in sample5 are correct
        self._validate_sample_fastq(reverse_fastq[4].open(), self.reverse,
                                    [6, 8, 9])

        # manifest is correct
        act_manifest = list(actual.manifest.view(FastqManifestFormat).open())
        exp_manifest = [
            'sample-id,filename,direction\n',
            'sample1,sample1_1_L001_R1_001.fastq.gz,forward\n',
            'sample1,sample1_1_L001_R2_001.fastq.gz,reverse\n',
            'sample3,sample3_2_L001_R1_001.fastq.gz,forward\n',
            'sample3,sample3_2_L001_R2_001.fastq.gz,reverse\n',
            'sample2,sample2_3_L001_R1_001.fastq.gz,forward\n',
            'sample2,sample2_3_L001_R2_001.fastq.gz,reverse\n',
            'sample5,sample5_4_L001_R1_001.fastq.gz,forward\n',
            'sample5,sample5_4_L001_R2_001.fastq.gz,reverse\n',
            'sample4,sample4_5_L001_R1_001.fastq.gz,forward\n',
            'sample4,sample4_5_L001_R2_001.fastq.gz,reverse\n'
        ]

        self._compare_manifests(act_manifest, exp_manifest)

        # metadata is correct
        act_metadata = list(actual.metadata.view(YamlFormat).open())
        exp_metadata = ["{phred-offset: 33}\n"]
        self.assertEqual(act_metadata, exp_metadata)
Exemplo n.º 6
0
 def test_no_matched_barcodes(self):
     barcodes = pd.Series(['CCCC', 'GGCC'], index=['sample1', 'sample2'])
     barcodes = qiime2.MetadataCategory(barcodes)
     with self.assertRaises(ValueError):
         emp_paired(self.bpsi, barcodes)
Exemplo n.º 7
0
 def test_variable_length_barcodes(self):
     barcodes = pd.Series(['AAA', 'AACC'], index=['sample1', 'sample2'])
     barcodes = qiime2.MetadataCategory(barcodes)
     with self.assertRaises(ValueError):
         emp_paired(self.bpsi, barcodes)