Exemplo n.º 1
0
    def test_analyse_barcodes_with_bcl2fastq_dir_and_bad_samplesheet(self):
        """
        AnalyseBarcodes: raise exception for bcl2fastq directory as input using 'bad' samplesheet
        """
        # Make a mock bcl2fastq output directory
        datadir = MockIlluminaData(os.path.join(
            self.wd, "200428_M00879_0087_000000000-AGEW9"),
                                   "bcl2fastq2",
                                   unaligned_dir="bcl2fastq",
                                   paired_end=True)
        datadir.add_fastq_batch("AB", "AB1", "AB1_S1")
        datadir.add_fastq_batch("AB", "AB2", "AB2_S2")
        datadir.add_fastq_batch("CDE", "CDE3", "CDE3_S3")
        datadir.add_fastq_batch("CDE", "CDE4", "CDE4_S4")
        datadir.add_fastq_batch("", "Undetermined", "Undetermined_S0")
        datadir.create()
        # Add data to Fastq files
        self._insert_fastq_reads(
            os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9"))
        # Create "bad" sample sheet with mixture of empty and
        # non-empty indices in a lane
        sample_sheet = os.path.join(self.wd, "custom_SampleSheet.csv")
        with open(sample_sheet, 'w') as fp:
            fp.write("""[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
AB1,AB1,,,D701,CGTGTAGG,D501,GACCTGAA,AB,
AB2,AB2,,,D702,CGTGTAGG,D501,ATGTAACT,AB,
CDE3,CDE3,,,,,,,CDE,
CDE4,CDE4,,,,,,,CDE,
""")
        # Set up and run pipeline
        p = AnalyseBarcodes(bcl2fastq_dir=os.path.join(
            self.wd, "200428_M00879_0087_000000000-AGEW9", "bcl2fastq"))
        self.assertRaises(Exception,
                          AnalyseBarcodes.run,
                          os.path.join(self.wd, "barcode_analysis"),
                          sample_sheet=sample_sheet,
                          working_dir=self.wd,
                          poll_interval=POLL_INTERVAL)
Exemplo n.º 2
0
 def test_analyse_barcodes_with_bcl2fastq_dir_no_samplesheet(self):
     """
     AnalyseBarcodes: bcl2fastq directory as input (no samplesheet)
     """
     # Make a mock bcl2fastq output directory
     datadir = MockIlluminaData(os.path.join(
         self.wd, "200428_M00879_0087_000000000-AGEW9"),
                                "bcl2fastq2",
                                unaligned_dir="bcl2fastq",
                                paired_end=True)
     datadir.add_fastq_batch("AB", "AB1", "AB1_S1")
     datadir.add_fastq_batch("AB", "AB2", "AB2_S2")
     datadir.add_fastq_batch("CDE", "CDE3", "CDE3_S3")
     datadir.add_fastq_batch("CDE", "CDE4", "CDE4_S4")
     datadir.add_fastq_batch("", "Undetermined", "Undetermined_S0")
     datadir.create()
     # Add data to Fastq files
     self._insert_fastq_reads(
         os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9"))
     # Set up and run pipeline
     p = AnalyseBarcodes(bcl2fastq_dir=os.path.join(
         self.wd, "200428_M00879_0087_000000000-AGEW9", "bcl2fastq"))
     exit_code = p.run(os.path.join(self.wd, "barcode_analysis"),
                       working_dir=self.wd,
                       poll_interval=POLL_INTERVAL)
     # Check outputs
     self.assertEqual(exit_code, 0)
     self.assertTrue(
         os.path.isdir(os.path.join(self.wd, "barcode_analysis")),
         "Missing dir: barcode_analysis")
     self.assertTrue(
         os.path.isdir(os.path.join(self.wd, "barcode_analysis", "counts")),
         "Missing dir: barcode_analysis/counts")
     for f in (
             "AB.AB1_S1_L001_R1_001.fastq.gz.counts",
             "AB.AB2_S2_L001_R1_001.fastq.gz.counts",
             "CDE.CDE3_S3_L001_R1_001.fastq.gz.counts",
             "CDE.CDE4_S4_L001_R1_001.fastq.gz.counts",
             "__undetermined__.Undetermined_S0_L001_R1_001.fastq.gz.counts"
     ):
         self.assertTrue(
             os.path.isfile(
                 os.path.join(self.wd, "barcode_analysis", "counts", f)),
             "Missing file: %s" % f)
     self.assertTrue(
         os.path.isfile(
             os.path.join(self.wd, "barcode_analysis", "barcodes.report")),
         "Missing file: barcodes.report")
     self.assertTrue(
         os.path.isfile(
             os.path.join(self.wd, "barcode_analysis", "barcodes.xls")),
         "Missing file: barcodes.xls")
     self.assertTrue(
         os.path.isfile(
             os.path.join(self.wd, "barcode_analysis", "barcodes.html")),
         "Missing file: barcodes.html")
     # Check that the report content is non-trivial
     barcodes_report = os.path.join(self.wd, "barcode_analysis",
                                    "barcodes.report")
     with open(barcodes_report, 'rt') as fp:
         contents = fp.read()
         self.assertTrue("Barcode analysis for lane #1" in contents)
         self.assertTrue(
             "#Rank\tIndex\tSample\tN_seqs\tN_reads\t%reads\t(%Total_reads)"
             in contents)
         # Expect 12 lines of content in total
         self.assertEqual(contents.count('\n'), 12)
Exemplo n.º 3
0
    def test_analyse_barcodes_with_multi_lane_samplesheet(self):
        """
        AnalyseBarcodes: multi-lane sample sheet as input
        """
        # Create sample sheet
        sample_sheet = os.path.join(self.wd, "custom_SampleSheet.csv")
        with open(sample_sheet, 'w') as fp:
            fp.write("""[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
1,AB1,AB1,,,D701,CGTGTAGG,D501,GACCTGAA,AB,
1,AB2,AB2,,,D702,CGTGTAGG,D501,ATGTAACT,AB,
2,CDE3,CDE3,,,D701,GACCTGAA,D501,CGTGTAGG,CDE,
2,CDE4,CDE4,,,D702,ATGTAACT,D501,CGTGTAGG,CDE,
""")
        # Set up pipeline before bcl2fastq directory exists
        p = AnalyseBarcodes(sample_sheet=sample_sheet)
        # Create the bcl2fastq directory before running pipeline
        datadir = MockIlluminaData(os.path.join(
            self.wd, "200428_M00879_0087_000000000-AGEW9"),
                                   "bcl2fastq2",
                                   unaligned_dir="bcl2fastq",
                                   paired_end=True)
        datadir.add_fastq_batch("AB", "AB1", "AB1_S1", lanes=(1, ))
        datadir.add_fastq_batch("AB", "AB2", "AB2_S2", lanes=(1, ))
        datadir.add_fastq_batch("CDE", "CDE3", "CDE3_S3", lanes=(2, ))
        datadir.add_fastq_batch("CDE", "CDE4", "CDE4_S4", lanes=(2, ))
        datadir.add_fastq_batch("",
                                "Undetermined",
                                "Undetermined_S0",
                                lanes=(1, 2))
        datadir.create()
        # Add data to Fastq files
        self._insert_fastq_reads(
            os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9"))
        # Run the pipeline
        exit_code = p.run(os.path.join(self.wd, "barcode_analysis"),
                          bcl2fastq_dir=os.path.join(
                              self.wd, "200428_M00879_0087_000000000-AGEW9",
                              "bcl2fastq"),
                          working_dir=self.wd,
                          poll_interval=POLL_INTERVAL)
        # Check outputs
        self.assertEqual(exit_code, 0)
        self.assertTrue(
            os.path.isdir(os.path.join(self.wd, "barcode_analysis")),
            "Missing dir: barcode_analysis")
        self.assertTrue(
            os.path.isdir(os.path.join(self.wd, "barcode_analysis", "counts")),
            "Missing dir: barcode_analysis/counts")
        for f in (
                "AB.AB1_S1_L001_R1_001.fastq.gz.counts",
                "AB.AB2_S2_L001_R1_001.fastq.gz.counts",
                "CDE.CDE3_S3_L002_R1_001.fastq.gz.counts",
                "CDE.CDE4_S4_L002_R1_001.fastq.gz.counts",
                "__undetermined__.Undetermined_S0_L001_R1_001.fastq.gz.counts"
        ):
            self.assertTrue(
                os.path.isfile(
                    os.path.join(self.wd, "barcode_analysis", "counts", f)),
                "Missing file: %s" % f)
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.report")),
            "Missing file: barcodes.report")
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.xls")),
            "Missing file: barcodes.xls")
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.html")),
            "Missing file: barcodes.html")
        # Check that the report content is non-trivial
        barcodes_report = os.path.join(self.wd, "barcode_analysis",
                                       "barcodes.report")
        with open(barcodes_report, 'rt') as fp:
            contents = fp.read()
            self.assertTrue("Barcode analysis for lane #1" in contents)
            self.assertTrue("Barcode analysis for lane #2" in contents)
            self.assertTrue(
                "#Rank\tIndex\tSample\tN_seqs\tN_reads\t%reads\t(%Total_reads)"
                in contents)
            self.assertTrue(
                "Problems detected:\n * Underrepresented samples" in contents)
            self.assertTrue(
                "   1\tTCCTGA\t\t1\t2\t100.0%\t(100.0%)" in contents)
            self.assertTrue(
                "The following samples are underrepresented:" in contents)
            for line in (
                    "AB1\tCGTGTAGG+GACCTGAA\t\t<0.1%",
                    "AB2\tCGTGTAGG+ATGTAACT\t\t<0.1%",
                    "CDE3\tGACCTGAA+CGTGTAGG\t\t<0.1%",
                    "CDE4\tATGTAACT+CGTGTAGG\t\t<0.1%",
            ):
                self.assertTrue(line in contents)
            # Expect at least 12 lines of content in total
            self.assertTrue(contents.count('\n') >= 12)
Exemplo n.º 4
0
    def test_analyse_barcodes_with_samplesheet_and_10x_indices(self):
        """
        AnalyseBarcodes: sample sheet with 10xGenomics indices
        """
        # Create sample sheet
        sample_sheet = os.path.join(self.wd, "custom_SampleSheet.csv")
        with open(sample_sheet, 'w') as fp:
            fp.write("""[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
AB1,AB1,,,D501,SI-GA-A2,AB,
AB2,AB2,,,D501,SI-GA-B2,AB,
CDE3,CDE3,,,D501,SI-GA-C2,CDE,
CDE4,CDE4,,,D501,SI-GA-D2,CDE,
""")
        # Set up pipeline before bcl2fastq directory exists
        p = AnalyseBarcodes(sample_sheet=sample_sheet)
        # Create the bcl2fastq directory before running pipeline
        datadir = MockIlluminaData(os.path.join(
            self.wd, "200428_M00879_0087_000000000-AGEW9"),
                                   "bcl2fastq2",
                                   unaligned_dir="bcl2fastq",
                                   paired_end=True)
        datadir.add_fastq_batch("AB", "AB1", "AB1_S1")
        datadir.add_fastq_batch("AB", "AB2", "AB2_S2")
        datadir.add_fastq_batch("CDE", "CDE3", "CDE3_S3")
        datadir.add_fastq_batch("CDE", "CDE4", "CDE4_S4")
        datadir.add_fastq_batch("", "Undetermined", "Undetermined_S0")
        datadir.create()
        # Add data to Fastq files
        self._insert_fastq_reads(
            os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9"))
        # Run the pipeline
        exit_code = p.run(os.path.join(self.wd, "barcode_analysis"),
                          bcl2fastq_dir=os.path.join(
                              self.wd, "200428_M00879_0087_000000000-AGEW9",
                              "bcl2fastq"),
                          working_dir=self.wd,
                          poll_interval=POLL_INTERVAL)
        # Check outputs
        self.assertEqual(exit_code, 0)
        self.assertTrue(
            os.path.isdir(os.path.join(self.wd, "barcode_analysis")),
            "Missing dir: barcode_analysis")
        self.assertTrue(
            os.path.isdir(os.path.join(self.wd, "barcode_analysis", "counts")),
            "Missing dir: barcode_analysis/counts")
        for f in (
                "AB.AB1_S1_L001_R1_001.fastq.gz.counts",
                "AB.AB2_S2_L001_R1_001.fastq.gz.counts",
                "CDE.CDE3_S3_L001_R1_001.fastq.gz.counts",
                "CDE.CDE4_S4_L001_R1_001.fastq.gz.counts",
                "__undetermined__.Undetermined_S0_L001_R1_001.fastq.gz.counts"
        ):
            self.assertTrue(
                os.path.isfile(
                    os.path.join(self.wd, "barcode_analysis", "counts", f)),
                "Missing file: %s" % f)
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.report")),
            "Missing file: barcodes.report")
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.xls")),
            "Missing file: barcodes.xls")
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.html")),
            "Missing file: barcodes.html")
        # Check that the report content is non-trivial
        barcodes_report = os.path.join(self.wd, "barcode_analysis",
                                       "barcodes.report")
        with open(barcodes_report, 'rt') as fp:
            contents = fp.read()
            self.assertTrue("Barcode analysis for lane #1" in contents)
            self.assertTrue(
                "#Rank\tIndex\tSample\tN_seqs\tN_reads\t%reads\t(%Total_reads)"
                in contents)
            # Expect 12 lines of content in total
            self.assertEqual(contents.count('\n'), 12)
Exemplo n.º 5
0
    def test_analyse_barcodes_with_bcl2fastq_dir_and_samplesheet_empty_index(
            self):
        """
        AnalyseBarcodes: bcl2fastq directory as input (with samplesheet, empty index)
        """
        # Make a mock bcl2fastq output directory
        datadir = MockIlluminaData(os.path.join(
            self.wd, "200428_M00879_0087_000000000-AGEW9"),
                                   "bcl2fastq2",
                                   unaligned_dir="bcl2fastq",
                                   paired_end=True)
        datadir.add_fastq_batch("AB", "AB1", "AB1_S1")
        datadir.create()
        # Add data to Fastq files
        self._insert_fastq_reads(
            os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9"))
        # Create sample sheet with single empty index
        sample_sheet = os.path.join(self.wd, "custom_SampleSheet.csv")
        with open(sample_sheet, 'w') as fp:
            fp.write("""[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
AB1,AB1,,,,,,,AB,
""")
        # Set up and run pipeline
        p = AnalyseBarcodes(bcl2fastq_dir=os.path.join(
            self.wd, "200428_M00879_0087_000000000-AGEW9", "bcl2fastq"))
        exit_code = p.run(os.path.join(self.wd, "barcode_analysis"),
                          sample_sheet=sample_sheet,
                          working_dir=self.wd,
                          poll_interval=POLL_INTERVAL)
        # Check outputs
        self.assertEqual(exit_code, 0)
        self.assertTrue(
            os.path.isdir(os.path.join(self.wd, "barcode_analysis")),
            "Missing dir: barcode_analysis")
        self.assertTrue(
            os.path.isdir(os.path.join(self.wd, "barcode_analysis", "counts")),
            "Missing dir: barcode_analysis/counts")
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "counts",
                             "AB.AB1_S1_L001_R1_001.fastq.gz.counts")),
            "Missing file: AB.AB1_S1_L001_R1_001.fastq.gz.counts")
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.report")),
            "Missing file: barcodes.report")
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.xls")),
            "Missing file: barcodes.xls")
        self.assertTrue(
            os.path.isfile(
                os.path.join(self.wd, "barcode_analysis", "barcodes.html")),
            "Missing file: barcodes.html")
        # Check that the report content is non-trivial
        barcodes_report = os.path.join(self.wd, "barcode_analysis",
                                       "barcodes.report")
        with open(barcodes_report, 'rt') as fp:
            contents = fp.read()
            self.assertTrue("Barcode analysis for lane #1" in contents)
            self.assertTrue(
                "#Rank\tIndex\tSample\tN_seqs\tN_reads\t%reads\t(%Total_reads)"
                in contents)
            self.assertTrue(
                "Problems detected:\n * Underrepresented samples" in contents)
            self.assertTrue(
                "   1\tTCCTGA\t\t1\t1\t100.0%\t(100.0%)" in contents)
            self.assertTrue(
                "The following samples are underrepresented:" in contents)
            self.assertTrue("AB1\t\t\t<0.1%" in contents)
            # Expect at least 12 lines of content in total
            self.assertTrue(contents.count('\n') >= 12)