def test_publish_qc_with_projects_with_multiple_fastq_sets(self): """publish_qc: projects with multiple Fastq sets """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Add additional fastq set for first project multi_fastqs_project = ap.get_analysis_projects()[0] UpdateAnalysisProject(multi_fastqs_project).add_fastq_set( "fastqs.extra", ("Alt1.r1.fastq.gz","Alt2.r1.fastq.gz")) UpdateAnalysisProject(multi_fastqs_project).add_qc_outputs( fastq_set="fastqs.extra", qc_dir="qc.extra") # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) # Additional QC for second fastq set in first project project_qc = "qc.extra_report.%s.%s" % (multi_fastqs_project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc.extra_report.html")) outputs.append(os.path.join(project_qc,"qc.extra")) # MultiQC output outputs.append("multiqc.extra_report.%s.html" % multi_fastqs_project.name) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f)
def test_publish_qc_with_icell8_outputs(self): """publish_qc: project with ICell8 QC outputs """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() projects = ap.get_analysis_projects() for project in projects: UpdateAnalysisProject(project).add_qc_outputs() # Add ICell8 report for one project icell8_project = projects[0] UpdateAnalysisProject(icell8_project).add_icell8_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) # ICell8 outputs icell8_dir = "icell8_processing.%s.%s" % (icell8_project.name, os.path.basename( ap.analysis_dir)) outputs.append(icell8_dir) outputs.append("%s.zip" % icell8_dir) outputs.append(os.path.join(icell8_dir,"icell8_processing_data")) outputs.append(os.path.join(icell8_dir,"icell8_processing.html")) outputs.append(os.path.join(icell8_dir,"stats")) # Do checks for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f)
def test_check_illumina_qc_outputs_singlecell_some_missing(self): """ check_illumina_qc_outputs: some illumina_qc.sh outputs missing (singlecell) """ # Make mock analysis project p = MockAnalysisProject("PJB", ( "PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Add QC artefacts project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs( include_fastq_strand=False, include_multiqc=False) # Remove some outputs for f in ( "PJB1_S1_R2_001_fastqc.html", "PJB1_S1_R2_001_model_organisms_screen.txt", ): os.remove(os.path.join(project.qc_dir, f)) # Check self.assertEqual( check_illumina_qc_outputs(project, qc_dir="qc", qc_protocol="singlecell"), [os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz")])
def test_cellrangercount_cellranger_atac_120(self): """ CellrangerCount: check outputs from cellranger-atac count (v1.2.0) """ # Add cellranger count outputs UpdateAnalysisProject(self.project).add_cellranger_count_outputs( cellranger='cellranger-atac') # Do tests count_dir = os.path.join(self.project.qc_dir,"cellranger_count","PJB1") cmdline = "/path/to/cellranger-atac-cs/1.2.0/bin/count --id PJB1 --fastqs /path/to/PJB/fastqs --sample PJB1 --reference /data/refdata-cellranger-atac-GRCh38-1.2.0 --jobmode=local --localcores=16 --localmem=128 --maxjobs=48 --jobinterval=100" with open(os.path.join(count_dir,"_cmdline"),'wt') as fp: fp.write("%s\n" % cmdline) cellranger_count = CellrangerCount(count_dir) self.assertEqual(cellranger_count.dir,count_dir) self.assertEqual(cellranger_count.sample_name,"PJB1") self.assertEqual(cellranger_count.metrics_csv, os.path.join(count_dir,"outs","summary.csv")) self.assertEqual(cellranger_count.web_summary, os.path.join(count_dir,"outs","web_summary.html")) self.assertEqual(cellranger_count.cmdline_file, os.path.join(count_dir,"_cmdline")) self.assertEqual(cellranger_count.cmdline,cmdline) self.assertEqual(cellranger_count.version,None) self.assertEqual(cellranger_count.reference_data, "/data/refdata-cellranger-atac-GRCh38-1.2.0") self.assertEqual(cellranger_count.cellranger_exe, "/path/to/cellranger-atac-cs/1.2.0/bin/count") self.assertEqual(cellranger_count.pipeline_name,"cellranger-atac")
def test_cellrangercount_501(self): """ CellrangerCount: check outputs from cellranger count (v5.0.1) """ # Add cellranger count outputs UpdateAnalysisProject(self.project).add_cellranger_count_outputs() # Do tests count_dir = os.path.join(self.project.qc_dir,"cellranger_count","PJB1") cmdline = "/path/to/cellranger count --id PJB1 --fastqs /path/to/PJB/fastqs --sample PJB1 --transcriptome /data/refdata-gex-GRCh38-2020-A --chemistry auto --r1-length=26 --jobmode=local --localcores=16 --localmem=48 --maxjobs=1 --jobinterval=100" with open(os.path.join(count_dir,"_cmdline"),'wt') as fp: fp.write("%s\n" % cmdline) cellranger_count = CellrangerCount(count_dir) self.assertEqual(cellranger_count.dir,count_dir) self.assertEqual(cellranger_count.sample_name,"PJB1") self.assertEqual(cellranger_count.metrics_csv, os.path.join(count_dir,"outs","metrics_summary.csv")) self.assertEqual(cellranger_count.web_summary, os.path.join(count_dir,"outs","web_summary.html")) self.assertEqual(cellranger_count.cmdline_file, os.path.join(count_dir,"_cmdline")) self.assertEqual(cellranger_count.cmdline,cmdline) self.assertEqual(cellranger_count.version,None) self.assertEqual(cellranger_count.reference_data, "/data/refdata-gex-GRCh38-2020-A") self.assertEqual(cellranger_count.cellranger_exe, "/path/to/cellranger") self.assertEqual(cellranger_count.pipeline_name,"cellranger")
def test_publish_qc_subset_of_projects(self): """publish_qc: only publish subset of projects """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report UpdateAnalysisDir(ap).add_processing_report() # Add QC outputs for subset of projects projects = ap.get_analysis_projects() missing_projects = projects[1:] projects = projects[0:1] for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir, projects="AB*") # Check outputs outputs = ["index.html", "processing_qc.html"] for project in projects: # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f) # Check that missing projects weren't copied for project in missing_projects: self.assertFalse(os.path.exists( os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)))), "%s exists in final dir, but shouldn't" % project.name)
def test_verify_qc_all_outputs(self): """verify_qc: project with all QC outputs present """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Add QC outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs() # Do verification self.assertTrue(verify_qc(project))
def test_import_project_with_qc(self): """import_project: check project with QC outputs is imported """ # Make mock multiqc MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Add QC outputs to the project to be imported UpdateAnalysisProject(AnalysisProject( 'NewProj', self.new_project_dir)).add_qc_outputs(include_multiqc=False) print(os.listdir(os.path.join(self.dirn, 'NewProj'))) # Check that the project is not currently present ap = AutoProcess(mockdir.dirn) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertFalse( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Import the project import_project(ap, self.new_project_dir) self.assertTrue( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertTrue('NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Verify via fresh AutoProcess object ap2 = AutoProcess(mockdir.dirn) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects()]) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj'))) # Check for QC report and ZIP file print(os.listdir(os.path.join(ap2.analysis_dir, 'NewProj'))) for f in ( "qc_report.html", "multiqc_report.html", "qc_report.NewProj.160621_M00879_0087_000000000-AGEW9.zip", ): f = os.path.join(ap2.analysis_dir, 'NewProj', f) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_check_illumina_qc_outputs_standardSE_all_present(self): """ check_illumina_qc_outputs: all illumina_qc.sh outputs present (standardSE) """ # Make mock analysis project p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Add QC artefacts project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs( include_fastq_strand=False, include_multiqc=False) # Check self.assertEqual( check_illumina_qc_outputs(project, qc_dir="qc", qc_protocol="standardSE"), [])
def test_publish_qc_use_hierarchy(self): """publish_qc: publish using YEAR/PLATFORM hierarchy """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir, use_hierarchy=True) # Check outputs final_dir = os.path.join(publication_dir, "2016", "hiseq") self.assertTrue(os.path.exists(final_dir)) outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) for item in outputs: f = os.path.join(final_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f)
def test_report_qc_all_outputs(self): """report_qc: project with all QC outputs present """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Add QC outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs() # Do reporting self.assertEqual(report_qc(project), 0) # Check output and reports for f in ("qc_report.html", "qc_report.PJB.zip", "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_check_fastq_strand_outputs_standardSE_present(self): """ check_fastq_strand_outputs: fastq_strand.py output present (standardSE) """ # Make mock analysis project p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs( protocol="standardSE", include_fastq_strand=True, include_multiqc=False) fastq_strand_conf = os.path.join(project.dirn, "fastq_strand.conf") # Check the outputs self.assertEqual( check_fastq_strand_outputs(project, "qc", fastq_strand_conf, qc_protocol="standardSE"), [])
def test_cellrangermulti(self): """ CellrangerMulti: check outputs from cellranger multi """ # Add config.csv file config_csv = os.path.join(self.project.dirn, "10x_multi_config.csv") with open(config_csv,'wt') as fp: fp.write("""[gene-expression] reference,/data/refdata-cellranger-gex-GRCh38-2020-A [libraries] fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate PJB1_GEX,/data/runs/fastqs_gex,any,PJB1,gene expression, PJB2_MC,/data/runs/fastqs_mc,any,PJB2,Multiplexing Capture, [samples] sample_id,cmo_ids,description PBA,CMO301,PBA PBB,CMO302,PBB """) # Add cellranger multi outputs UpdateAnalysisProject(self.project).add_cellranger_multi_outputs( config_csv) # Do tests multi_dir = os.path.join(self.project.qc_dir,"cellranger_multi") cmdline = "/path/to/cellranger count --id PJB --csv %s --jobmode=local --localcores=16 --localmem=48 --maxjobs=1 --jobinterval=100" % config_csv with open(os.path.join(multi_dir,"_cmdline"),'wt') as fp: fp.write("%s\n" % cmdline) cellranger_multi = CellrangerMulti(multi_dir) self.assertEqual(cellranger_multi.dir,multi_dir) self.assertEqual(cellranger_multi.sample_names,["PBA","PBB"]) self.assertEqual(cellranger_multi.metrics_csv('PBA'), os.path.join(multi_dir, "outs", "per_sample_outs", "PBA", "metrics_summary.csv")) self.assertEqual(cellranger_multi.metrics_csv('PBB'), os.path.join(multi_dir, "outs", "per_sample_outs", "PBB", "metrics_summary.csv")) self.assertTrue(isinstance(cellranger_multi.metrics('PBA'), MultiplexSummary)) self.assertTrue(isinstance(cellranger_multi.metrics('PBB'), MultiplexSummary)) self.assertEqual(cellranger_multi.web_summary('PBA'), os.path.join(multi_dir, "outs", "per_sample_outs", "PBA", "web_summary.html")) self.assertEqual(cellranger_multi.web_summary('PBB'), os.path.join(multi_dir, "outs", "per_sample_outs", "PBB", "web_summary.html")) self.assertEqual(cellranger_multi.cmdline_file, os.path.join(multi_dir,"_cmdline")) self.assertEqual(cellranger_multi.cmdline,cmdline) self.assertEqual(cellranger_multi.version,None) self.assertEqual(cellranger_multi.reference_data, "/data/refdata-cellranger-gex-GRCh38-2020-A") self.assertEqual(cellranger_multi.cellranger_exe, "/path/to/cellranger") self.assertEqual(cellranger_multi.pipeline_name,"cellranger")
def test_archive_to_final_multiple_fastq_sets_read_only_fastqs(self): """archive: test copying multiple fastq sets to final archive dir (read-only Fastqs) """ # Make a mock auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '170901_M00879_0087_000000000-AGEW9', 'miseq', metadata={ "instrument_datestamp": "170901" }, top_dir=self.dirn) mockdir.create() # Make a mock archive directory archive_dir = os.path.join(self.dirn,"archive") final_dir = os.path.join(archive_dir, "2017", "miseq") os.makedirs(final_dir) self.assertTrue(os.path.isdir(final_dir)) self.assertEqual(len(os.listdir(final_dir)),0) # Make autoprocess instance and set required metadata ap = AutoProcess(analysis_dir=mockdir.dirn, settings=self.settings) ap.set_metadata("source","testing") ap.set_metadata("run_number","87") # Add additional fastq set for first project multi_fastqs_project = ap.get_analysis_projects()[0] UpdateAnalysisProject(multi_fastqs_project).add_fastq_set( "fastqs.extra", ("Alt1.r1.fastq.gz","Alt2.r1.fastq.gz")) # Do archiving op status = archive(ap, archive_dir=archive_dir, year='2017',platform='miseq', read_only_fastqs=True, final=True) self.assertEqual(status,0) # Check that final dir exists final_archive_dir = os.path.join( final_dir, "170901_M00879_0087_000000000-AGEW9_analysis") self.assertTrue(os.path.exists(final_archive_dir)) self.assertEqual(len(os.listdir(final_dir)),1) # Check contents dirs = ("AB","CDE","logs","undetermined") for d in dirs: d = os.path.join(final_archive_dir,d) self.assertTrue(os.path.exists(d)) files = ("auto_process.info", "custom_SampleSheet.csv", "metadata.info", "projects.info", "SampleSheet.orig.csv") for f in files: f = os.path.join(final_archive_dir,f) self.assertTrue(os.path.exists(f)) # Check that Fastqs are not writable for project in ("AB","CDE","undetermined"): fq_dir = os.path.join(final_archive_dir, project, "fastqs") self.assertTrue(os.path.exists(fq_dir)) fqs = os.listdir(fq_dir) self.assertTrue(len(fqs) > 0) for fq in fqs: fq = os.path.join(fq_dir,fq) self.assertTrue(os.access(fq,os.R_OK)) self.assertFalse(os.access(fq,os.W_OK)) # Check additional Fastqs are not writable fq_dir = os.path.join(final_archive_dir, multi_fastqs_project.name, "fastqs.extra") self.assertTrue(os.path.exists(fq_dir)) fqs = os.listdir(fq_dir) self.assertTrue(len(fqs) > 0) for fq in fqs: fq = os.path.join(fq_dir,fq) self.assertTrue(os.access(fq,os.R_OK)) self.assertFalse(os.access(fq,os.W_OK))