def test_make_fastqs_specify_platform_via_metadata(self):
     """make_fastqs: implicitly specify the platform via metadata
     """
     # Create mock source data
     illumina_run = MockIlluminaRun("171020_UNKNOWN_00002_AHGXXXX",
                                    "miseq",
                                    top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"),
                              platform="miseq")
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Do the test
     ap = AutoProcess()
     ap.setup(os.path.join(self.wd, "171020_UNKNOWN_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertTrue(ap.metadata.platform is None)
     ap.metadata["platform"] = "miseq"
     ap.make_fastqs(protocol="standard")
     # Check outputs
     analysis_dir = os.path.join(self.wd,
                                 "171020_UNKNOWN_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_UNKNOWN_00002_AHGXXXX"),
                    os.path.join("logs", "002_make_fastqs"), "bcl2fastq"):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info", "statistics_full.info",
                   "per_lane_statistics.info", "per_lane_sample_stats.info",
                   "projects.info", "processing_qc.html"):
         self.assertTrue(os.path.isfile(os.path.join(analysis_dir, filen)),
                         "Missing file: %s" % filen)
 def test_make_fastqs_icell8_protocol(self):
     """make_fastqs: icell8 protocol
     """
     # Create mock source data
     illumina_run = MockIlluminaRun("171020_SN7001250_00002_AHGXXXX",
                                    "hiseq",
                                    top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     # Check that bases mask is as expected
     MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"),
                              assert_bases_mask="y25n76,I8,I8,y101")
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Do the test
     ap = AutoProcess()
     ap.setup(os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     ap.make_fastqs(protocol="icell8")
     # Check outputs
     analysis_dir = os.path.join(self.wd,
                                 "171020_SN7001250_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_SN7001250_00002_AHGXXXX"),
                    os.path.join("logs",
                                 "002_make_fastqs_icell8"), "bcl2fastq"):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info", "statistics_full.info",
                   "per_lane_statistics.info", "per_lane_sample_stats.info",
                   "projects.info", "processing_qc.html"):
         self.assertTrue(os.path.isfile(os.path.join(analysis_dir, filen)),
                         "Missing file: %s" % filen)
 def test_make_fastqs_unknown_platform(self):
     """make_fastqs: unknown platform raises exception
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_UNKNOWN_00002_AHGXXXX",
         "miseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                           "bcl2fastq"))
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_UNKNOWN_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     self.assertFalse(ap.params.acquired_primary_data)
     self.assertRaises(Exception,
                       make_fastqs,
                       ap,
                       protocol="standard")
 def test_make_fastqs_handle_bcl2fastq2_failure(self):
     """make_fastqs: handle bcl2fastq2 failure
     """
     # Create mock source data
     illumina_run = MockIlluminaRun("171020_M00879_00002_AHGXXXX",
                                    "miseq",
                                    top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq which will fail (i.e.
     # return non-zero exit code)
     MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"),
                              exit_code=1)
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Do the test
     ap = AutoProcess()
     ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertRaises(Exception, ap.make_fastqs, protocol="standard")
     # Check outputs
     analysis_dir = os.path.join(self.wd,
                                 "171020_M00879_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_M00879_00002_AHGXXXX"),
                    os.path.join("logs", "002_make_fastqs"), "bcl2fastq"):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info", "statistics_full.info",
                   "per_lane_statistics.info", "per_lane_sample_stats.info",
                   "projects.info", "processing_qc.html"):
         self.assertFalse(os.path.exists(os.path.join(analysis_dir, filen)),
                          "Missing file: %s" % filen)
 def test_make_fastqs_explicitly_specify_platform(self):
     """make_fastqs: explicitly specify the platform
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_UNKNOWN_00002_AHGXXXX",
         "miseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                           "bcl2fastq"),
                              platform="miseq")
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_UNKNOWN_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     self.assertFalse(ap.params.acquired_primary_data)
     make_fastqs(ap,
                    protocol="standard",
                    platform="miseq")
     # Check parameters
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertEqual(ap.params.primary_data_dir,
                      os.path.join(self.wd,
                                   "171020_UNKNOWN_00002_AHGXXXX_analysis",
                                   "primary_data"))
     self.assertTrue(ap.params.acquired_primary_data)
     # Check outputs
     analysis_dir = os.path.join(
         self.wd,
         "171020_UNKNOWN_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_UNKNOWN_00002_AHGXXXX"),
                    os.path.join("logs",
                                 "002_make_fastqs"),
                    "bcl2fastq",
                    "barcode_analysis",):
         self.assertTrue(os.path.isdir(
             os.path.join(analysis_dir,subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info",
                   "statistics_full.info",
                   "per_lane_statistics.info",
                   "per_lane_sample_stats.info",
                   "projects.info",
                   "processing_qc.html"):
         self.assertTrue(os.path.isfile(
             os.path.join(analysis_dir,filen)),
                         "Missing file: %s" % filen)
 def test_make_fastqs_10x_chromium_sc_protocol(self):
     """make_fastqs: 10x_chromium_sc protocol
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_SN7001250_00002_AHGXXXX",
         "hiseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq and cellranger executables
     MockBcl2fastq2Exe.create(os.path.join(self.bin,"bcl2fastq"))
     MockCellrangerExe.create(os.path.join(self.bin,"cellranger"))
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_SN7001250_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     self.assertFalse(ap.params.acquired_primary_data)
     make_fastqs(ap,protocol="10x_chromium_sc")
     # Check parameters
     self.assertEqual(ap.params.primary_data_dir,
                      os.path.join(self.wd,
                                   "171020_SN7001250_00002_AHGXXXX_analysis",
                                   "primary_data"))
     self.assertTrue(ap.params.acquired_primary_data)
     # Check outputs
     analysis_dir = os.path.join(
         self.wd,
         "171020_SN7001250_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_SN7001250_00002_AHGXXXX"),
                    os.path.join("logs",
                                 "002_make_fastqs_10x_chromium_sc"),
                    "bcl2fastq",
                    "HGXXXX",):
         self.assertTrue(os.path.isdir(
             os.path.join(analysis_dir,subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info",
                   "statistics_full.info",
                   "per_lane_statistics.info",
                   "per_lane_sample_stats.info",
                   "projects.info",
                   "processing_qc.html",
                   "cellranger_qc_summary.html"):
         self.assertTrue(os.path.isfile(
             os.path.join(analysis_dir,filen)),
                         "Missing file: %s" % filen)
    def test_make_fastqs_samplesheet_with_invalid_characters(self):
        """make_fastqs: stop for invalid characters in sample sheet
        """
        # Create mock source data with samplesheet with backspace
        illumina_run = MockIlluminaRun(
            "171020_M00879_00002_AHGXXXX",
            "miseq",
            sample_sheet_content="""[Header],,,,,,,,,
IEMFileVersion,4
Date,11/23/2015
Workflow,GenerateFASTQ
Application,FASTQ Only
Assay,TruSeq HT
Description,
Chemistry,Amplicon

[Reads]
101
101

[Settings]
ReverseComplement,0
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT

[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
Sample1,Sample1,,,D701,CGTGTAGG,D501,GACCTGTC,,\b
Sample2,Sample2,,,D702,CGTGTAGG,D501,ATGTAACT,,
""",
            top_dir=self.wd)
        illumina_run.create()
        # Create mock bcl2fastq
        MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                              "bcl2fastq"),
                                 platform="miseq")
        os.environ['PATH'] = "%s:%s" % (self.bin,
                                        os.environ['PATH'])
        # Do the test
        ap = AutoProcess(settings=self.settings)
        ap.setup(os.path.join(self.wd,
                              "171020_M00879_00002_AHGXXXX"))
        self.assertTrue(ap.params.sample_sheet is not None)
        self.assertEqual(ap.params.bases_mask,"auto")
        self.assertTrue(ap.params.primary_data_dir is None)
        self.assertFalse(ap.params.acquired_primary_data)
        self.assertRaises(Exception,
                          make_fastqs,
                          ap)
Пример #8
0
    def test_run_cellranger_mkfastq_subset_of_lanes(self):
        """run_cellranger_mkfastq: check cellranger is executed for subset of lanes
        """
        # Create mock source data
        illumina_run = MockIlluminaRun("171020_SN7001250_00002_AHGXXXX",
                                       "hiseq",
                                       top_dir=self.wd)
        illumina_run.create()
        # Mock sample sheet with chromium indices
        sample_sheet_file = os.path.join(self.wd, "samplesheet.csv")
        with open(sample_sheet_file, 'w') as fp:
            fp.write("""[Header]
IEMFileVersion,4

[Reads]
76
76

[Settings]
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT

[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
1,smpl1,smpl1,,,A001,SI-GA-A1,10xGenomics,
2,smpl2,smpl2,,,A005,SI-GA-B1,10xGenomics,
3,smpl3,smpl3,,,A006,SI-GA-C1,10xGenomics,
4,smpl4,smpl4,,,A007,SI-GA-D1,10xGenomics,
""")
        # Create mock bcl2fastq and cellranger executables
        MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"))
        MockCellrangerExe.create(os.path.join(self.bin, "cellranger"))
        os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
        # Output dir
        output_dir = "bcl2fastq"
        self.assertFalse(os.path.exists("HGXXXX_34"))
        self.assertFalse(os.path.exists("cellranger_qc_summary_34.html"))
        self.assertFalse(os.path.exists(output_dir))
        # Run 'cellranger mkfastq'
        exit_code = run_cellranger_mkfastq(sample_sheet_file,
                                           illumina_run.dirn,
                                           output_dir,
                                           lanes="3,4")
        # Check outputs
        self.assertEqual(exit_code, 0)
        self.assertTrue(os.path.isdir("HGXXXX_34"))
        self.assertTrue(os.path.isfile("cellranger_qc_summary_34.html"))
        self.assertTrue(os.path.isdir(output_dir))
 def test_make_fastqs_missing_fastqs_no_placeholders(self):
     """make_fastqs: missing fastqs, no placeholders
     """
     # Create mock source data
     illumina_run = MockIlluminaRun("171020_M00879_00002_AHGXXXX",
                                    "miseq",
                                    top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"),
                              missing_fastqs=(
                                  "Sample1_S1_L001_R1_001.fastq.gz",
                                  "Sample1_S1_L001_R2_001.fastq.gz",
                              ))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Do the test
     ap = AutoProcess()
     ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertRaises(Exception,
                       ap.make_fastqs,
                       protocol="standard",
                       create_empty_fastqs=False)
     # Check outputs
     analysis_dir = os.path.join(self.wd,
                                 "171020_M00879_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_M00879_00002_AHGXXXX"),
                    os.path.join("logs", "002_make_fastqs"), "bcl2fastq"):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info", "statistics_full.info",
                   "per_lane_statistics.info", "per_lane_sample_stats.info",
                   "projects.info", "processing_qc.html"):
         self.assertFalse(os.path.exists(os.path.join(analysis_dir, filen)),
                          "Missing file: %s" % filen)
     self.assertTrue(
         os.path.exists(
             os.path.join(analysis_dir, "logs", "002_make_fastqs",
                          "missing_fastqs.log")))
    def test_make_fastqs_icell8_protocol_no_demultiplexing(self):
        """make_fastqs: icell8 protocol with no demultiplexing
        """
        # Sample sheet with no barcodes
        samplesheet_no_demultiplexing = """[Header]
IEMFileVersion,4
Assay,Nextera XT

[Reads]
76
76

[Settings]
ReverseComplement,0
Adapter,CTGTCTCTTATACACATCT

[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
AB1,AB1,,,,,icell8,
"""
        sample_sheet = os.path.join(self.wd,"SampleSheet.csv")
        with open(sample_sheet,'w') as fp:
            fp.write(samplesheet_no_demultiplexing)
        # Create mock source data
        illumina_run = MockIlluminaRun(
            "171020_NB500968_00002_AHGXXXX",
            "nextseq",
            top_dir=self.wd)
        illumina_run.create()
        # Create mock bcl2fastq
        # Check that bases mask is as expected
        MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                              "bcl2fastq"),
                                 assert_bases_mask="y25n51,nnnnnn,y76")
        os.environ['PATH'] = "%s:%s" % (self.bin,
                                        os.environ['PATH'])
        # Do the test
        ap = AutoProcess(settings=self.settings)
        ap.setup(os.path.join(self.wd,
                              "171020_NB500968_00002_AHGXXXX"),
                 sample_sheet=sample_sheet)
        self.assertTrue(ap.params.sample_sheet is not None)
        make_fastqs(ap,protocol="icell8")
        # Check outputs
        analysis_dir = os.path.join(
            self.wd,
            "171020_NB500968_00002_AHGXXXX_analysis")
        for subdir in (os.path.join("primary_data",
                                    "171020_NB500968_00002_AHGXXXX"),
                       os.path.join("logs",
                                    "002_make_fastqs_icell8"),
                       "bcl2fastq",
                       "barcode_analysis",):
            self.assertTrue(os.path.isdir(
                os.path.join(analysis_dir,subdir)),
                            "Missing subdir: %s" % subdir)
        for filen in ("statistics.info",
                      "statistics_full.info",
                      "per_lane_statistics.info",
                      "per_lane_sample_stats.info",
                      "projects.info",
                      "processing_qc.html"):
            self.assertTrue(os.path.isfile(
                os.path.join(analysis_dir,filen)),
                            "Missing file: %s" % filen)
    def test_make_fastqs_10x_chromium_sc_atac_protocol(self):
        """make_fastqs: 10x_chromium_sc_atac protocol
        """
        # Sample sheet with 10xGenomics Chromium SC ATAC-seq indices
        samplesheet_chromium_sc_atac_indices = """[Header]
IEMFileVersion,4
Assay,Nextera XT

[Reads]
76
76

[Settings]
ReverseComplement,0
Adapter,CTGTCTCTTATACACATCT

[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
1,smpl1,smpl1,,,A001,SI-NA-A1,10xGenomics,
2,smpl2,smpl2,,,A005,SI-NA-B1,10xGenomics,
"""
        sample_sheet = os.path.join(self.wd,"SampleSheet.csv")
        with open(sample_sheet,'w') as fp:
            fp.write(samplesheet_chromium_sc_atac_indices)
        # Create mock source data
        illumina_run = MockIlluminaRun(
            "171020_SN7001250_00002_AHGXXXX",
            "hiseq",
            top_dir=self.wd)
        illumina_run.create()
        # Create mock bcl2fastq and cellranger-atac
        MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                              "bcl2fastq"))
        MockCellrangerExe.create(os.path.join(self.bin,
                                              "cellranger-atac"),
                                 reads=('R1','R2','R3','I1',))
        os.environ['PATH'] = "%s:%s" % (self.bin,
                                        os.environ['PATH'])
        # Do the test
        ap = AutoProcess(settings=self.settings)
        ap.setup(os.path.join(self.wd,
                              "171020_SN7001250_00002_AHGXXXX"),
                 sample_sheet=sample_sheet)
        self.assertTrue(ap.params.sample_sheet is not None)
        self.assertEqual(ap.params.bases_mask,"auto")
        self.assertTrue(ap.params.primary_data_dir is None)
        self.assertFalse(ap.params.acquired_primary_data)
        make_fastqs(ap,protocol="10x_chromium_sc_atac")
        # Check parameters
        self.assertEqual(ap.params.bases_mask,"auto")
        self.assertEqual(ap.params.primary_data_dir,
                         os.path.join(self.wd,
                                      "171020_SN7001250_00002_AHGXXXX_analysis",
                                      "primary_data"))
        self.assertTrue(ap.params.acquired_primary_data)
        # Check outputs
        analysis_dir = os.path.join(
            self.wd,
            "171020_SN7001250_00002_AHGXXXX_analysis")
        for subdir in (os.path.join("primary_data",
                                    "171020_SN7001250_00002_AHGXXXX"),
                       os.path.join("logs",
                                    "002_make_fastqs_10x_chromium_sc_atac"),
                       "bcl2fastq",):
            self.assertTrue(os.path.isdir(
                os.path.join(analysis_dir,subdir)),
                            "Missing subdir: %s" % subdir)
        for filen in ("statistics.info",
                      "statistics_full.info",
                      "per_lane_statistics.info",
                      "per_lane_sample_stats.info",
                      "projects.info",
                      "processing_qc.html"):
            self.assertTrue(os.path.isfile(
                os.path.join(analysis_dir,filen)),
                            "Missing file: %s" % filen)