def test_make_fastqs_specify_platform_via_metadata(self): """make_fastqs: implicitly specify the platform via metadata """ # Create mock source data illumina_run = MockIlluminaRun( "171020_UNKNOWN_00002_AHGXXXX", "miseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"), platform="miseq") os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_UNKNOWN_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertTrue(ap.metadata.platform is None) ap.metadata["platform"] = "miseq" self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) make_fastqs(ap,protocol="standard") # Check parameters self.assertEqual(ap.params.bases_mask,"auto") self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_UNKNOWN_00002_AHGXXXX_analysis", "primary_data")) self.assertTrue(ap.params.acquired_primary_data) # Check outputs analysis_dir = os.path.join( self.wd, "171020_UNKNOWN_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_UNKNOWN_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs"), "bcl2fastq", "barcode_analysis",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_make_fastqs_icell8_protocol(self): """make_fastqs: icell8 protocol """ # Create mock source data illumina_run = MockIlluminaRun( "171020_SN7001250_00002_AHGXXXX", "hiseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq # Check that bases mask is as expected MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"), assert_bases_mask="y25n76,I8,I8,y101") os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) make_fastqs(ap,protocol="icell8") # Check parameters self.assertEqual(ap.params.bases_mask,"auto") self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX_analysis", "primary_data")) self.assertTrue(ap.params.acquired_primary_data) # Check outputs analysis_dir = os.path.join( self.wd, "171020_SN7001250_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_SN7001250_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs_icell8"), "bcl2fastq", "barcode_analysis",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_make_fastqs_10x_chromium_sc_protocol(self): """make_fastqs: 10x_chromium_sc protocol """ # Create mock source data illumina_run = MockIlluminaRun( "171020_SN7001250_00002_AHGXXXX", "hiseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq and cellranger executables MockBcl2fastq2Exe.create(os.path.join(self.bin,"bcl2fastq")) MockCellrangerExe.create(os.path.join(self.bin,"cellranger")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) make_fastqs(ap,protocol="10x_chromium_sc") # Check parameters self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX_analysis", "primary_data")) self.assertTrue(ap.params.acquired_primary_data) # Check outputs analysis_dir = os.path.join( self.wd, "171020_SN7001250_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_SN7001250_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs_10x_chromium_sc"), "bcl2fastq", "HGXXXX",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html", "cellranger_qc_summary.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_make_fastqs_standard_protocol_stores_bases_mask(self): """make_fastqs: standard protocol stores supplied bases mask """ # Create mock source data illumina_run = MockIlluminaRun( "171020_M00879_00002_AHGXXXX", "miseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) make_fastqs(ap,protocol="standard",bases_mask="y101,I8,I8,y101") # Check parameters self.assertEqual(ap.params.bases_mask,"y101,I8,I8,y101") self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_M00879_00002_AHGXXXX_analysis", "primary_data")) # Check outputs analysis_dir = os.path.join( self.wd, "171020_M00879_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_M00879_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs"), "bcl2fastq", "barcode_analysis",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_make_fastqs_missing_fastqs_with_placeholders(self): """make_fastqs: missing fastqs with placeholders """ # Create mock source data illumina_run = MockIlluminaRun( "171020_M00879_00002_AHGXXXX", "miseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"), missing_fastqs=( "Sample1_S1_L001_R1_001.fastq.gz", "Sample1_S1_L001_R2_001.fastq.gz",)) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) make_fastqs(ap, protocol="standard", create_empty_fastqs=True) # Check parameters self.assertEqual(ap.params.bases_mask,"auto") self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_M00879_00002_AHGXXXX_analysis", "primary_data")) self.assertTrue(ap.params.acquired_primary_data) # Check outputs analysis_dir = os.path.join( self.wd, "171020_M00879_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_M00879_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs"), "bcl2fastq", "barcode_analysis",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info"): self.assertTrue(os.path.exists( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen) self.assertTrue( os.path.exists( os.path.join(analysis_dir, "logs", "002_make_fastqs", "missing_fastqs.log")))
def test_make_fastqs_icell8_protocol_no_demultiplexing(self): """make_fastqs: icell8 protocol with no demultiplexing """ # Sample sheet with no barcodes samplesheet_no_demultiplexing = """[Header] IEMFileVersion,4 Assay,Nextera XT [Reads] 76 76 [Settings] ReverseComplement,0 Adapter,CTGTCTCTTATACACATCT [Data] Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description AB1,AB1,,,,,icell8, """ sample_sheet = os.path.join(self.wd,"SampleSheet.csv") with open(sample_sheet,'w') as fp: fp.write(samplesheet_no_demultiplexing) # Create mock source data illumina_run = MockIlluminaRun( "171020_NB500968_00002_AHGXXXX", "nextseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq # Check that bases mask is as expected MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"), assert_bases_mask="y25n51,nnnnnn,y76") os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_NB500968_00002_AHGXXXX"), sample_sheet=sample_sheet) self.assertTrue(ap.params.sample_sheet is not None) make_fastqs(ap,protocol="icell8") # Check outputs analysis_dir = os.path.join( self.wd, "171020_NB500968_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_NB500968_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs_icell8"), "bcl2fastq", "barcode_analysis",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_make_fastqs_10x_chromium_sc_atac_protocol(self): """make_fastqs: 10x_chromium_sc_atac protocol """ # Sample sheet with 10xGenomics Chromium SC ATAC-seq indices samplesheet_chromium_sc_atac_indices = """[Header] IEMFileVersion,4 Assay,Nextera XT [Reads] 76 76 [Settings] ReverseComplement,0 Adapter,CTGTCTCTTATACACATCT [Data] Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description 1,smpl1,smpl1,,,A001,SI-NA-A1,10xGenomics, 2,smpl2,smpl2,,,A005,SI-NA-B1,10xGenomics, """ sample_sheet = os.path.join(self.wd,"SampleSheet.csv") with open(sample_sheet,'w') as fp: fp.write(samplesheet_chromium_sc_atac_indices) # Create mock source data illumina_run = MockIlluminaRun( "171020_SN7001250_00002_AHGXXXX", "hiseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq and cellranger-atac MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq")) MockCellrangerExe.create(os.path.join(self.bin, "cellranger-atac"), reads=('R1','R2','R3','I1',)) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX"), sample_sheet=sample_sheet) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) make_fastqs(ap,protocol="10x_chromium_sc_atac") # Check parameters self.assertEqual(ap.params.bases_mask,"auto") self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX_analysis", "primary_data")) self.assertTrue(ap.params.acquired_primary_data) # Check outputs analysis_dir = os.path.join( self.wd, "171020_SN7001250_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_SN7001250_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs_10x_chromium_sc_atac"), "bcl2fastq",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)