def test_make_fastqs_unknown_platform(self):
     """make_fastqs: unknown platform raises exception
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_UNKNOWN_00002_AHGXXXX",
         "miseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                           "bcl2fastq"))
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_UNKNOWN_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     self.assertFalse(ap.params.acquired_primary_data)
     self.assertRaises(Exception,
                       make_fastqs,
                       ap,
                       protocol="standard")
 def test_publish_qc_with_projects_with_multiple_fastq_sets(self):
     """publish_qc: projects with multiple Fastq sets
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={ "run_number": 87,
                    "source": "local",
                    "instrument_datestamp": "160621" },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn)
     # Add processing report and QC outputs
     UpdateAnalysisDir(ap).add_processing_report()
     for project in ap.get_analysis_projects():
         UpdateAnalysisProject(project).add_qc_outputs()
     # Add additional fastq set for first project
     multi_fastqs_project = ap.get_analysis_projects()[0]
     UpdateAnalysisProject(multi_fastqs_project).add_fastq_set(
         "fastqs.extra",
         ("Alt1.r1.fastq.gz","Alt2.r1.fastq.gz"))
     UpdateAnalysisProject(multi_fastqs_project).add_qc_outputs(
         fastq_set="fastqs.extra",
         qc_dir="qc.extra")
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn,'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap,location=publication_dir)
     # Check outputs
     outputs = ["index.html",
                "processing_qc.html"]
     for project in ap.get_analysis_projects():
         # Standard QC outputs
         project_qc = "qc_report.%s.%s" % (project.name,
                                           os.path.basename(
                                               ap.analysis_dir))
         outputs.append(project_qc)
         outputs.append("%s.zip" % project_qc)
         outputs.append(os.path.join(project_qc,"qc_report.html"))
         outputs.append(os.path.join(project_qc,"qc"))
         # MultiQC output
         outputs.append("multiqc_report.%s.html" % project.name)
     # Additional QC for second fastq set in first project
     project_qc = "qc.extra_report.%s.%s" % (multi_fastqs_project.name,
                                             os.path.basename(
                                                 ap.analysis_dir))
     outputs.append(project_qc)
     outputs.append("%s.zip" % project_qc)
     outputs.append(os.path.join(project_qc,"qc.extra_report.html"))
     outputs.append(os.path.join(project_qc,"qc.extra"))
     # MultiQC output
     outputs.append("multiqc.extra_report.%s.html" %
                    multi_fastqs_project.name)
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f),"Missing %s" % f)
 def test_make_fastqs_handle_bcl2fastq2_failure(self):
     """make_fastqs: handle bcl2fastq2 failure
     """
     # Create mock source data
     illumina_run = MockIlluminaRun("171020_M00879_00002_AHGXXXX",
                                    "miseq",
                                    top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq which will fail (i.e.
     # return non-zero exit code)
     MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"),
                              exit_code=1)
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Do the test
     ap = AutoProcess()
     ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertRaises(Exception, ap.make_fastqs, protocol="standard")
     # Check outputs
     analysis_dir = os.path.join(self.wd,
                                 "171020_M00879_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_M00879_00002_AHGXXXX"),
                    os.path.join("logs", "002_make_fastqs"), "bcl2fastq"):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info", "statistics_full.info",
                   "per_lane_statistics.info", "per_lane_sample_stats.info",
                   "projects.info", "processing_qc.html"):
         self.assertFalse(os.path.exists(os.path.join(analysis_dir, filen)),
                          "Missing file: %s" % filen)
 def test_import_project(self):
     """Check AutoProcess.import_project imports a project
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9',
         'miseq',
         top_dir=self.dirn)
     mockdir.create()
     # Check that the project is not currently present
     ap = AutoProcess(mockdir.dirn)
     self.assertFalse('NewProj' in [p.name
                                    for p in ap.get_analysis_projects()])
     self.assertFalse('NewProj' in [p.name
                                    for p in ap.get_analysis_projects_from_dirs()])
     self.assertFalse(os.path.exists(os.path.join(ap.analysis_dir,'NewProj')))
     # Import the project
     ap.import_project(self.new_project_dir)
     self.assertTrue('NewProj' in [p.name
                                   for p in ap.get_analysis_projects()])
     self.assertTrue('NewProj' in [p.name
                                   for p in ap.get_analysis_projects_from_dirs()])
     self.assertTrue(os.path.exists(os.path.join(ap.analysis_dir,'NewProj')))
     # Verify via fresh AutoProcess object
     ap2 = AutoProcess(mockdir.dirn)
     self.assertTrue('NewProj' in [p.name
                                   for p in ap2.get_analysis_projects()])
     self.assertTrue('NewProj' in [p.name
                                   for p in ap2.get_analysis_projects_from_dirs()])
     self.assertTrue(os.path.exists(os.path.join(ap2.analysis_dir,'NewProj')))
 def test_import_project(self):
     """import_project: check project is imported
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Check that the project is not currently present
     ap = AutoProcess(mockdir.dirn)
     self.assertFalse(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertFalse(
         'NewProj' in
         [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertFalse(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Import the project
     import_project(ap, self.new_project_dir)
     self.assertTrue(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertTrue('NewProj' in
                     [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Verify via fresh AutoProcess object
     ap2 = AutoProcess(mockdir.dirn)
     self.assertTrue(
         'NewProj' in [p.name for p in ap2.get_analysis_projects()])
     self.assertTrue(
         'NewProj' in
         [p.name for p in ap2.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj')))
Пример #6
0
 def test_publish_qc_with_project_missing_qc(self):
     """publish_qc: raises exception if project has missing QC
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing report
     UpdateAnalysisDir(ap).add_processing_report()
     # Add QC outputs for subset of projects
     projects = ap.get_analysis_projects()[1:]
     for project in projects:
         UpdateAnalysisProject(project).add_qc_outputs()
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn, 'QC')
     os.mkdir(publication_dir)
     # Publish
     self.assertRaises(Exception, publish_qc, ap, location=publication_dir)
Пример #7
0
 def test_analysis_dir_path(self):
     """AutoProcess: analysis dir path is absolute and normalized
     """
     # Create mock Illumina run directory
     mock_illumina_run = MockIlluminaRun(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mock_illumina_run.create()
     # Set up new AutoProcess instance
     ap = AutoProcess()
     self.assertEqual(ap.analysis_dir, None)
     # Make a mock analysis dir
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Create Autoprocess instances from different
     # forms of path and check stored value
     rel_path = "160621_M00879_0087_000000000-AGEW9_analysis"
     abs_path = os.path.join(self.dirn, rel_path)
     rel_unnormalised = os.path.join("..", os.path.basename(self.dirn),
                                     rel_path)
     abs_unnormalised = os.path.join(self.dirn, rel_unnormalised)
     ap = AutoProcess(analysis_dir=abs_path)
     self.assertEqual(ap.analysis_dir, abs_path)
     ap = AutoProcess(analysis_dir=rel_path)
     self.assertEqual(ap.analysis_dir, abs_path)
     ap = AutoProcess(analysis_dir=abs_unnormalised)
     self.assertEqual(ap.analysis_dir, abs_path)
     ap = AutoProcess(analysis_dir=rel_unnormalised)
     self.assertEqual(ap.analysis_dir, abs_path)
Пример #8
0
 def test_publish_qc_missing_destination(self):
     """publish_qc: raise exception if destination doesn't exist
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing report and QC outputs
     UpdateAnalysisDir(ap).add_processing_report()
     for project in ap.get_analysis_projects():
         UpdateAnalysisProject(project).add_qc_outputs()
     # Reference publication area which doesn't exist
     publication_dir = os.path.join(self.dirn, 'QC')
     self.assertFalse(os.path.exists(publication_dir))
     # Publish
     self.assertRaises(Exception, publish_qc, ap, location=publication_dir)
     self.assertFalse(os.path.exists(publication_dir))
Пример #9
0
    def test_casava_new_output_dir(self):
        """
        AutoProcess.merge_fastq_dirs: casava/bcl2fastq v1.8.* output, new output dir
        """
        analysis_dir = self._setup_casava()
        # Merge the unaligned dirs
        self.ap = AutoProcess(analysis_dir)
        self.ap.merge_fastq_dirs("bcl2fastq.lanes1-2", output_dir="bcl2fastq")
        # Check outputs
        self._assert_dir_exists(
            os.path.join(analysis_dir, 'save.bcl2fastq.lanes1-2'))
        self._assert_dir_exists(
            os.path.join(analysis_dir, 'save.bcl2fastq.lanes3-4'))
        self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.lanes1-2'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.lanes3-4'))
        for f in (
                'Project_AB/Sample_AB1/AB1_GCCAAT_L001_R1_001.fastq.gz',
                'Project_AB/Sample_AB1/AB1_GCCAAT_L001_R2_001.fastq.gz',
                'Project_AB/Sample_AB2/AB2_AGTCAA_L001_R1_001.fastq.gz',
                'Project_AB/Sample_AB2/AB2_AGTCAA_L001_R2_001.fastq.gz',
                'Project_AB/Sample_AB1/AB1_GCCAAT_L002_R1_001.fastq.gz',
                'Project_AB/Sample_AB1/AB1_GCCAAT_L002_R2_001.fastq.gz',
                'Project_AB/Sample_AB2/AB2_AGTCAA_L002_R1_001.fastq.gz',
                'Project_AB/Sample_AB2/AB2_AGTCAA_L002_R2_001.fastq.gz',
                'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L003_R1_001.fastq.gz',
                'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L003_R2_001.fastq.gz',
                'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L003_R1_001.fastq.gz',
                'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L003_R2_001.fastq.gz',
                'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L004_R1_001.fastq.gz',
                'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L004_R2_001.fastq.gz',
                'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L004_R1_001.fastq.gz',
                'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L004_R2_001.fastq.gz',
                'Undetermined_indices/Sample_lane1/lane1_Undetermined_L001_R1_001.fastq.gz',
                'Undetermined_indices/Sample_lane1/lane1_Undetermined_L001_R2_001.fastq.gz',
                'Undetermined_indices/Sample_lane2/lane2_Undetermined_L002_R1_001.fastq.gz',
                'Undetermined_indices/Sample_lane2/lane2_Undetermined_L002_R2_001.fastq.gz',
                'Undetermined_indices/Sample_lane3/lane3_Undetermined_L003_R1_001.fastq.gz',
                'Undetermined_indices/Sample_lane3/lane3_Undetermined_L003_R2_001.fastq.gz',
                'Undetermined_indices/Sample_lane4/lane4_Undetermined_L004_R1_001.fastq.gz',
                'Undetermined_indices/Sample_lane4/lane4_Undetermined_L004_R2_001.fastq.gz'
        ):
            self._assert_file_exists(os.path.join(analysis_dir, 'bcl2fastq',
                                                  f))
        # Check projects.info files
        self._assert_file_exists(
            os.path.join(analysis_dir, 'save.projects.info'))
        self._assert_file_exists(os.path.join(analysis_dir, 'projects.info'))
        projects_info = open(os.path.join(analysis_dir, 'projects.info'),
                             'r').read()
        expected = """#Project	Samples	User	Library	Organism	PI	Comments
AB	AB1,AB2	.	.	.	.	.
CDE	CDE3,CDE4	.	.	.	.	.
"""
        self.assertEqual(projects_info, expected)
 def test_publish_qc_subset_of_projects(self):
     """publish_qc: only publish subset of projects
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={ "run_number": 87,
                    "source": "local",
                    "instrument_datestamp": "160621" },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn)
     # Add processing report
     UpdateAnalysisDir(ap).add_processing_report()
     # Add QC outputs for subset of projects
     projects = ap.get_analysis_projects()
     missing_projects = projects[1:]
     projects = projects[0:1]
     for project in ap.get_analysis_projects():
         UpdateAnalysisProject(project).add_qc_outputs()
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn,'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap,location=publication_dir,
                projects="AB*")
     # Check outputs
     outputs = ["index.html",
                "processing_qc.html"]
     for project in projects:
         # Standard QC outputs
         project_qc = "qc_report.%s.%s" % (project.name,
                                           os.path.basename(
                                               ap.analysis_dir))
         outputs.append(project_qc)
         outputs.append("%s.zip" % project_qc)
         outputs.append(os.path.join(project_qc,"qc_report.html"))
         outputs.append(os.path.join(project_qc,"qc"))
         # MultiQC output
         outputs.append("multiqc_report.%s.html" % project.name)
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f),"Missing %s" % f)
     # Check that missing projects weren't copied
     for project in missing_projects:
         self.assertFalse(os.path.exists(
             os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          "qc_report.%s.%s" % (project.name,
                                               os.path.basename(
                                                   ap.analysis_dir)))),
                          "%s exists in final dir, but shouldn't" %
                          project.name)
 def test_make_fastqs_specify_platform_via_metadata(self):
     """make_fastqs: implicitly specify the platform via metadata
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_UNKNOWN_00002_AHGXXXX",
         "miseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                           "bcl2fastq"),
                              platform="miseq")
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_UNKNOWN_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertTrue(ap.metadata.platform is None)
     ap.metadata["platform"] = "miseq"
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     self.assertFalse(ap.params.acquired_primary_data)
     make_fastqs(ap,protocol="standard")
     # Check parameters
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertEqual(ap.params.primary_data_dir,
                      os.path.join(self.wd,
                                   "171020_UNKNOWN_00002_AHGXXXX_analysis",
                                   "primary_data"))
     self.assertTrue(ap.params.acquired_primary_data)
     # Check outputs
     analysis_dir = os.path.join(
         self.wd,
         "171020_UNKNOWN_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_UNKNOWN_00002_AHGXXXX"),
                    os.path.join("logs",
                                 "002_make_fastqs"),
                    "bcl2fastq",
                    "barcode_analysis",):
         self.assertTrue(os.path.isdir(
             os.path.join(analysis_dir,subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info",
                   "statistics_full.info",
                   "per_lane_statistics.info",
                   "per_lane_sample_stats.info",
                   "projects.info",
                   "processing_qc.html"):
         self.assertTrue(os.path.isfile(
             os.path.join(analysis_dir,filen)),
                         "Missing file: %s" % filen)
Пример #12
0
 def test_clone_analysis_dir_copy_fastqs(self):
     """
     clone: copies an analysis directory
     """
     # Make a source analysis dir
     analysis_dir = MockAnalysisDirFactory.bcl2fastq2(
         "190116_M01234_0002_AXYZ123",
         platform="miseq",
         paired_end=True,
         no_lane_splitting=False,
         include_stats_files=True,
         top_dir=self.dirn)
     analysis_dir.create()
     ap = AutoProcess(analysis_dir.dirn)
     UpdateAnalysisDir(ap).add_processing_report()
     ap.add_directory("primary_data/190116_M01234_0002_AXYZ123")
     # Make a copy
     clone_dir = os.path.join(self.dirn, "190116_M01234_0002_AXYZ123_copy")
     self.assertFalse(os.path.exists(clone_dir))
     clone(ap, clone_dir, copy_fastqs=True)
     self.assertTrue(os.path.isdir(clone_dir))
     # Check contents
     for subdir in ('logs', 'ScriptCode'):
         d = os.path.join(clone_dir, subdir)
         self.assertTrue(os.path.isdir(d), "Missing '%s'" % subdir)
     for filen in (
             'SampleSheet.orig.csv',
             'custom_SampleSheet.csv',
             'auto_process.info',
             'metadata.info',
             'statistics.info',
             'statistics_full.info',
             'per_lane_statistics.info',
             'per_lane_sample_stats.info',
             'processing_qc.html',
     ):
         f = os.path.join(clone_dir, filen)
         self.assertTrue(os.path.isfile(f), "Missing '%s'" % filen)
     # Check unaligned
     unaligned = os.path.join(clone_dir, 'bcl2fastq')
     self.assertTrue(os.path.isdir(unaligned))
     # Check primary data
     primary_data = os.path.join(clone_dir, 'primary_data',
                                 '190116_M01234_0002_AXYZ123')
     self.assertTrue(os.path.islink(primary_data))
     # Check projects
     for proj in ('AB', 'CDE', 'undetermined'):
         d = os.path.join(clone_dir, proj)
         self.assertTrue(os.path.isdir(d), "Missing '%s'" % proj)
     # Check parameters
     params = AnalysisDirParameters(
         filen=os.path.join(clone_dir, 'auto_process.info'))
     self.assertEqual(params.sample_sheet,
                      os.path.join(clone_dir, "custom_SampleSheet.csv"))
     self.assertEqual(params.primary_data_dir,
                      os.path.join(clone_dir, "primary_data"))
 def test_publish_qc_with_icell8_outputs(self):
     """publish_qc: project with ICell8 QC outputs
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={ "run_number": 87,
                    "source": "local",
                    "instrument_datestamp": "160621" },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn)
     # Add processing report and QC outputs
     UpdateAnalysisDir(ap).add_processing_report()
     projects = ap.get_analysis_projects()
     for project in projects:
         UpdateAnalysisProject(project).add_qc_outputs()
     # Add ICell8 report for one project
     icell8_project = projects[0]
     UpdateAnalysisProject(icell8_project).add_icell8_outputs()
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn,'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap,location=publication_dir)
     # Check outputs
     outputs = ["index.html",
                "processing_qc.html"]
     for project in ap.get_analysis_projects():
         # Standard QC outputs
         project_qc = "qc_report.%s.%s" % (project.name,
                                           os.path.basename(
                                               ap.analysis_dir))
         outputs.append(project_qc)
         outputs.append("%s.zip" % project_qc)
         outputs.append(os.path.join(project_qc,"qc_report.html"))
         outputs.append(os.path.join(project_qc,"qc"))
         # MultiQC output
         outputs.append("multiqc_report.%s.html" % project.name)
     # ICell8 outputs
     icell8_dir = "icell8_processing.%s.%s" % (icell8_project.name,
                                               os.path.basename(
                                                   ap.analysis_dir))
     outputs.append(icell8_dir)
     outputs.append("%s.zip" % icell8_dir)
     outputs.append(os.path.join(icell8_dir,"icell8_processing_data"))
     outputs.append(os.path.join(icell8_dir,"icell8_processing.html"))
     outputs.append(os.path.join(icell8_dir,"stats"))
     # Do checks
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f),"Missing %s" % f)
Пример #14
0
    def test_bcl2fastq2_new_output_dir(self):
        """
        AutoProcess.merge_fastq_dirs: bcl2fastq v2 output, new output dir
        """
        analysis_dir = self._setup_bcl2fastq2()
        # Merge the unaligned dirs
        self.ap = AutoProcess(analysis_dir)
        self.ap.merge_fastq_dirs("bcl2fastq.lanes1-2", output_dir="bcl2fastq")
        # Check outputs
        self._assert_dir_exists(
            os.path.join(analysis_dir, 'save.bcl2fastq.lanes1-2'))
        self._assert_dir_exists(
            os.path.join(analysis_dir, 'save.bcl2fastq.lanes3-4'))
        self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.lanes1-2'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.lanes3-4'))
        for f in ('AB/AB1_S1_L001_R1_001.fastq.gz',
                  'AB/AB1_S1_L001_R2_001.fastq.gz',
                  'AB/AB2_S2_L001_R1_001.fastq.gz',
                  'AB/AB2_S2_L001_R2_001.fastq.gz',
                  'AB/AB1_S1_L002_R1_001.fastq.gz',
                  'AB/AB1_S1_L002_R2_001.fastq.gz',
                  'AB/AB2_S2_L002_R1_001.fastq.gz',
                  'AB/AB2_S2_L002_R2_001.fastq.gz',
                  'CDE/CDE3_S3_L003_R1_001.fastq.gz',
                  'CDE/CDE3_S3_L003_R2_001.fastq.gz',
                  'CDE/CDE4_S4_L003_R1_001.fastq.gz',
                  'CDE/CDE4_S4_L003_R2_001.fastq.gz',
                  'CDE/CDE3_S3_L004_R1_001.fastq.gz',
                  'CDE/CDE3_S3_L004_R2_001.fastq.gz',
                  'CDE/CDE4_S4_L004_R1_001.fastq.gz',
                  'CDE/CDE4_S4_L004_R2_001.fastq.gz',
                  'Undetermined_S0_L001_R1_001.fastq.gz',
                  'Undetermined_S0_L001_R2_001.fastq.gz',
                  'Undetermined_S0_L002_R1_001.fastq.gz',
                  'Undetermined_S0_L002_R2_001.fastq.gz',
                  'Undetermined_S0_L003_R1_001.fastq.gz',
                  'Undetermined_S0_L003_R2_001.fastq.gz',
                  'Undetermined_S0_L004_R1_001.fastq.gz',
                  'Undetermined_S0_L004_R2_001.fastq.gz'):
            self._assert_file_exists(os.path.join(analysis_dir, 'bcl2fastq',
                                                  f))
        # Check projects.info files
        self._assert_file_exists(
            os.path.join(analysis_dir, 'save.projects.info'))
        self._assert_file_exists(os.path.join(analysis_dir, 'projects.info'))
        projects_info = open(os.path.join(analysis_dir, 'projects.info'),
                             'r').read()
        expected = """#Project	Samples	User	Library	Organism	PI	Comments
AB	AB1,AB2	.	.	.	.	.
CDE	CDE3,CDE4	.	.	.	.	.
"""
        self.assertEqual(projects_info, expected)
 def test_make_fastqs_icell8_protocol(self):
     """make_fastqs: icell8 protocol
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_SN7001250_00002_AHGXXXX",
         "hiseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     # Check that bases mask is as expected
     MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                           "bcl2fastq"),
                              assert_bases_mask="y25n76,I8,I8,y101")
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_SN7001250_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     self.assertFalse(ap.params.acquired_primary_data)
     make_fastqs(ap,protocol="icell8")
     # Check parameters
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertEqual(ap.params.primary_data_dir,
                      os.path.join(self.wd,
                                   "171020_SN7001250_00002_AHGXXXX_analysis",
                                   "primary_data"))
     self.assertTrue(ap.params.acquired_primary_data)
     # Check outputs
     analysis_dir = os.path.join(
         self.wd,
         "171020_SN7001250_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_SN7001250_00002_AHGXXXX"),
                    os.path.join("logs",
                                 "002_make_fastqs_icell8"),
                    "bcl2fastq",
                    "barcode_analysis",):
         self.assertTrue(os.path.isdir(
             os.path.join(analysis_dir,subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info",
                   "statistics_full.info",
                   "per_lane_statistics.info",
                   "per_lane_sample_stats.info",
                   "projects.info",
                   "processing_qc.html"):
         self.assertTrue(os.path.isfile(
             os.path.join(analysis_dir,filen)),
                         "Missing file: %s" % filen)
Пример #16
0
    def test_bcl2fastq2_no_lane_splitting_new_output_dir(self):
        """
        AutoProcess.merge_fastq_dirs: bcl2fastq v2 output with --no-lane-splitting, new output dir
        """
        analysis_dir = self._setup_bcl2fastq2_no_lane_splitting()
        # Merge the unaligned dirs
        self.ap = AutoProcess(analysis_dir)
        self.ap.merge_fastq_dirs("bcl2fastq.AB", output_dir="bcl2fastq")
        # Check outputs
        self._assert_dir_exists(os.path.join(analysis_dir,
                                             'save.bcl2fastq.AB'))
        self._assert_dir_exists(
            os.path.join(analysis_dir, 'save.bcl2fastq.CDE'))
        self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.AB'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.CDE'))
        for f in (
                'AB/AB1_S1_R1_001.fastq.gz',
                'AB/AB1_S1_R2_001.fastq.gz',
                'AB/AB2_S2_R1_001.fastq.gz',
                'AB/AB2_S2_R2_001.fastq.gz',
                'CDE/CDE3_S3_R1_001.fastq.gz',
                'CDE/CDE3_S3_R2_001.fastq.gz',
                'CDE/CDE4_S4_R1_001.fastq.gz',
                'CDE/CDE4_S4_R2_001.fastq.gz',
                'Undetermined_S0_R1_001.fastq.gz',
                'Undetermined_S0_R2_001.fastq.gz',
        ):
            self._assert_file_exists(os.path.join(analysis_dir, 'bcl2fastq',
                                                  f))
        # Check merge of undetermined fastqs
        undetermined_r1 = gzip.GzipFile(
            os.path.join(analysis_dir, 'bcl2fastq',
                         'Undetermined_S0_R1_001.fastq.gz'), 'rb').read()
        expected_r1 = '\n'.join(fastq_reads_r1[:8]) + '\n'
        self.assertEqual(undetermined_r1, expected_r1)
        undetermined_r2 = gzip.GzipFile(
            os.path.join(analysis_dir, 'bcl2fastq',
                         'Undetermined_S0_R2_001.fastq.gz'), 'rb').read()
        expected_r2 = '\n'.join(fastq_reads_r2[:8]) + '\n'
        self.assertEqual(undetermined_r2, expected_r2)
        # Check projects.info files
        self._assert_file_exists(
            os.path.join(analysis_dir, 'save.projects.info'))
        self._assert_file_exists(os.path.join(analysis_dir, 'projects.info'))
        projects_info = open(os.path.join(analysis_dir, 'projects.info'),
                             'r').read()
        expected = """#Project	Samples	User	Library	Organism	PI	Comments
AB	AB1,AB2	.	.	.	.	.
CDE	CDE3,CDE4	.	.	.	.	.
"""
        self.assertEqual(projects_info, expected)
Пример #17
0
 def test_publish_qc_with_projects_no_reports(self):
     """publish_qc: projects with all QC outputs but no reports
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing report and QC outputs
     UpdateAnalysisDir(ap).add_processing_report()
     for project in ap.get_analysis_projects():
         UpdateAnalysisProject(project).add_qc_outputs()
     # Remove the QC reports
     for project in ap.get_analysis_projects():
         qc_reports = []
         qc_reports.append(
             "qc_report.%s.%s.zip" %
             (project.name, os.path.basename(ap.analysis_dir)))
         qc_reports.append("qc_report.html")
         qc_reports.append("multiqc_report.html")
         for f in qc_reports:
             os.remove(os.path.join(project.dirn, f))
     # Make a mock multiqc
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn, 'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap, location=publication_dir)
     # Check outputs
     outputs = ["index.html", "processing_qc.html"]
     for project in ap.get_analysis_projects():
         # Standard QC outputs
         project_qc = "qc_report.%s.%s" % (
             project.name, os.path.basename(ap.analysis_dir))
         outputs.append(project_qc)
         outputs.append("%s.zip" % project_qc)
         outputs.append(os.path.join(project_qc, "qc_report.html"))
         outputs.append(os.path.join(project_qc, "qc"))
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
 def test_make_fastqs_10x_chromium_sc_protocol(self):
     """make_fastqs: 10x_chromium_sc protocol
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_SN7001250_00002_AHGXXXX",
         "hiseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq and cellranger executables
     MockBcl2fastq2Exe.create(os.path.join(self.bin,"bcl2fastq"))
     MockCellrangerExe.create(os.path.join(self.bin,"cellranger"))
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_SN7001250_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     self.assertFalse(ap.params.acquired_primary_data)
     make_fastqs(ap,protocol="10x_chromium_sc")
     # Check parameters
     self.assertEqual(ap.params.primary_data_dir,
                      os.path.join(self.wd,
                                   "171020_SN7001250_00002_AHGXXXX_analysis",
                                   "primary_data"))
     self.assertTrue(ap.params.acquired_primary_data)
     # Check outputs
     analysis_dir = os.path.join(
         self.wd,
         "171020_SN7001250_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_SN7001250_00002_AHGXXXX"),
                    os.path.join("logs",
                                 "002_make_fastqs_10x_chromium_sc"),
                    "bcl2fastq",
                    "HGXXXX",):
         self.assertTrue(os.path.isdir(
             os.path.join(analysis_dir,subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info",
                   "statistics_full.info",
                   "per_lane_statistics.info",
                   "per_lane_sample_stats.info",
                   "projects.info",
                   "processing_qc.html",
                   "cellranger_qc_summary.html"):
         self.assertTrue(os.path.isfile(
             os.path.join(analysis_dir,filen)),
                         "Missing file: %s" % filen)
 def test_import_project_with_qc(self):
     """import_project: check project with QC outputs is imported
     """
     # Make mock multiqc
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Add QC outputs to the project to be imported
     UpdateAnalysisProject(AnalysisProject(
         'NewProj',
         self.new_project_dir)).add_qc_outputs(include_multiqc=False)
     print(os.listdir(os.path.join(self.dirn, 'NewProj')))
     # Check that the project is not currently present
     ap = AutoProcess(mockdir.dirn)
     self.assertFalse(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertFalse(
         'NewProj' in
         [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertFalse(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Import the project
     import_project(ap, self.new_project_dir)
     self.assertTrue(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertTrue('NewProj' in
                     [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Verify via fresh AutoProcess object
     ap2 = AutoProcess(mockdir.dirn)
     self.assertTrue(
         'NewProj' in [p.name for p in ap2.get_analysis_projects()])
     self.assertTrue(
         'NewProj' in
         [p.name for p in ap2.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj')))
     # Check for QC report and ZIP file
     print(os.listdir(os.path.join(ap2.analysis_dir, 'NewProj')))
     for f in (
             "qc_report.html",
             "multiqc_report.html",
             "qc_report.NewProj.160621_M00879_0087_000000000-AGEW9.zip",
     ):
         f = os.path.join(ap2.analysis_dir, 'NewProj', f)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
 def test_make_fastqs_standard_protocol_stores_bases_mask(self):
     """make_fastqs: standard protocol stores supplied bases mask
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_M00879_00002_AHGXXXX",
         "miseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                           "bcl2fastq"))
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_M00879_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     make_fastqs(ap,protocol="standard",bases_mask="y101,I8,I8,y101")
     # Check parameters
     self.assertEqual(ap.params.bases_mask,"y101,I8,I8,y101")
     self.assertEqual(ap.params.primary_data_dir,
                      os.path.join(self.wd,
                                   "171020_M00879_00002_AHGXXXX_analysis",
                                   "primary_data"))
     # Check outputs
     analysis_dir = os.path.join(
         self.wd,
         "171020_M00879_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_M00879_00002_AHGXXXX"),
                    os.path.join("logs",
                                 "002_make_fastqs"),
                    "bcl2fastq",
                    "barcode_analysis",):
         self.assertTrue(os.path.isdir(
             os.path.join(analysis_dir,subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info",
                   "statistics_full.info",
                   "per_lane_statistics.info",
                   "per_lane_sample_stats.info",
                   "projects.info",
                   "processing_qc.html"):
         self.assertTrue(os.path.isfile(
             os.path.join(analysis_dir,filen)),
                         "Missing file: %s" % filen)
    def test_make_fastqs_samplesheet_with_invalid_characters(self):
        """make_fastqs: stop for invalid characters in sample sheet
        """
        # Create mock source data with samplesheet with backspace
        illumina_run = MockIlluminaRun(
            "171020_M00879_00002_AHGXXXX",
            "miseq",
            sample_sheet_content="""[Header],,,,,,,,,
IEMFileVersion,4
Date,11/23/2015
Workflow,GenerateFASTQ
Application,FASTQ Only
Assay,TruSeq HT
Description,
Chemistry,Amplicon

[Reads]
101
101

[Settings]
ReverseComplement,0
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT

[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
Sample1,Sample1,,,D701,CGTGTAGG,D501,GACCTGTC,,\b
Sample2,Sample2,,,D702,CGTGTAGG,D501,ATGTAACT,,
""",
            top_dir=self.wd)
        illumina_run.create()
        # Create mock bcl2fastq
        MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                              "bcl2fastq"),
                                 platform="miseq")
        os.environ['PATH'] = "%s:%s" % (self.bin,
                                        os.environ['PATH'])
        # Do the test
        ap = AutoProcess(settings=self.settings)
        ap.setup(os.path.join(self.wd,
                              "171020_M00879_00002_AHGXXXX"))
        self.assertTrue(ap.params.sample_sheet is not None)
        self.assertEqual(ap.params.bases_mask,"auto")
        self.assertTrue(ap.params.primary_data_dir is None)
        self.assertFalse(ap.params.acquired_primary_data)
        self.assertRaises(Exception,
                          make_fastqs,
                          ap)
Пример #22
0
 def test_autoprocess_setup_existing_target_dir(self):
     """AutoProcess.setup works when target dir exists
     """
     # Create mock Illumina run directory
     mock_illumina_run = MockIlluminaRun(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mock_illumina_run.create()
     # Make a mock auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Do setup into existing analysis dir
     ap = AutoProcess()
     ap.setup(mock_illumina_run.dirn)
     self.assertTrue(os.path.isdir('160621_M00879_0087_000000000-AGEW9'))
Пример #23
0
 def test_with_project_dirs_no_projects_dot_info_no_unaligned(self):
     """AutoProcess.get_analysis_projects: project dirs exist (no projects.info, no unaligned)
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local"
         },
         top_dir=self.dirn)
     mockdir.create()
     # Remove the projects.info file
     os.remove(os.path.join(mockdir.dirn, "projects.info"))
     # List the projects
     projects = AutoProcess(mockdir.dirn).get_analysis_projects()
     expected = ('AB', 'CDE', 'undetermined')
     self.assertEqual(len(projects), len(expected))
     for p in projects:
         self.assertTrue(isinstance(p, AnalysisProject))
         self.assertTrue(p.name in expected)
     for p in expected:
         matched_projects = [x for x in projects if x.name == p]
         self.assertEqual(len(matched_projects), 1)
Пример #24
0
 def test_bcl2fastq2_can_be_loaded_after_rsync(self):
     """
     merge_fastq_dirs: rsynced bcl2fastq v2 output can be loaded
     """
     analysis_dir = self._setup_bcl2fastq2()
     # Merge the unaligned dirs
     self.ap = AutoProcess(analysis_dir, settings=self.settings)
     merge_fastq_dirs(self.ap, "bcl2fastq.lanes1-2", output_dir="bcl2fastq")
     # Check output directory exists and can be loaded
     self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq'))
     try:
         illumina_data = IlluminaData(analysis_dir,
                                      unaligned_dir='bcl2fastq')
     except Exception as ex:
         self.fail("exception loading merged directory: %s" % ex)
     # Rsync (with empty directories pruned)
     target_dir = os.path.join(self.dirn, "rsynced")
     os.mkdir(target_dir)
     applications.general.rsync(
         "%s/bcl2fastq" % self.ap.analysis_dir,
         target_dir,
         prune_empty_dirs=True).run_subprocess(
             log=os.path.join(self.dirn, "rsync.log"))
     # Check rsynced directory exists and can be loaded
     self._assert_dir_exists(os.path.join(target_dir, 'bcl2fastq'))
     try:
         illumina_data = IlluminaData(target_dir, unaligned_dir='bcl2fastq')
     except Exception as ex:
         self.fail("exception loading rsynced directory: %s" % ex)
Пример #25
0
    def test_ignore_commented_projects(self):
        """AutoProcess.get_analysis_projects: ignore commented projects
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create()
        # Update the projects.info file
        projects_info = os.path.join(mockdir.dirn, "projects.info")
        with open(projects_info, "w") as fp:
            fp.write(
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
#AB\tAB1,AB2\tAlan Brown\tRNA-seq\t.\tHuman\tAudrey Benson\t1% PhiX
CDE\tCDE3,CDE4\tClive David Edwards\tChIP-seq\t.\tMouse\tClaudia Divine Eccleston\t1% PhiX
""")
        # List the projects
        projects = AutoProcess(mockdir.dirn).get_analysis_projects()
        expected = ('CDE', 'undetermined')
        self.assertEqual(len(projects), len(expected))
        for p in projects:
            self.assertTrue(isinstance(p, AnalysisProject))
            self.assertTrue(p.name in expected)
        for p in expected:
            matched_projects = [x for x in projects if x.name == p]
            self.assertEqual(len(matched_projects), 1)
Пример #26
0
    def test_update_project_metadata_file_uncomment_existing_project(self):
        """
        AutoProcess.update_project_metadata_file: existing project is uncommented
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Create projects.info file with one project already listed
        with open(os.path.join(mockdir.dirn, "projects.info"), 'wt') as fp:
            fp.write(
                "#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments\n#CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\tKeep me"
            )
        # Update the projects.info file
        AutoProcess(mockdir.dirn).update_project_metadata_file()
        # Check output - missing project kept but commented out
        with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp:
            self.assertEqual(
                fp.read(),
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
AB\tAB1,AB2\t.\t.\t.\t.\t.\t.
CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\tKeep me
""")
Пример #27
0
    def test_update_project_metadata_file_missing_from_bcl2fastq_output(self):
        """
        AutoProcess.update_project_metadata_file: make missing file and populate from bcl2fastq output
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Remove projects.info file
        os.remove(os.path.join(mockdir.dirn, "projects.info"))
        # Update the projects.info file
        AutoProcess(mockdir.dirn).update_project_metadata_file()
        # Check output
        with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp:
            self.assertEqual(
                fp.read(),
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
AB\tAB1,AB2\t.\t.\t.\t.\t.\t.
CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\t.
""")
Пример #28
0
    def test_make_project_metadata_file_no_bcl2fastq_output(self):
        """
        AutoProcess.make_project_metadata_file: new 'projects.info' (no bcl2fastq output)
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Remove the projects.info file and the bcl2fastq output dir
        os.remove(os.path.join(mockdir.dirn, "projects.info"))
        shutil.rmtree(os.path.join(mockdir.dirn, "bcl2fastq"))
        # Create a new projects.info file
        AutoProcess(mockdir.dirn).make_project_metadata_file()
        # Check outputs
        self.assertTrue(
            os.path.exists(os.path.join(mockdir.dirn, "projects.info")))
        with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp:
            self.assertEqual(
                fp.read(),
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
""")
 def test_update_fastq_stats(self):
     """update_fastq_stats: generates statistics files
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '190104_M00879_0087_000000000-AGEW9',
         'miseq',
         metadata={ "instrument_datestamp": "190104" },
         top_dir=self.wd)
     mockdir.create(no_project_dirs=True)
     # Statistics files
     stats_files = (
         "statistics.info",
         "statistics_full.info",
         "per_lane_statistics.info",
         "per_lane_sample_stats.info",
     )
     # Check stats files don't already exist
     for filen in stats_files:
         self.assertFalse(os.path.exists(os.path.join(mockdir.dirn,filen)),
                          "%s: file exists, but shouldn't" %
                          filen)
     # Update (i.e. generate) stats
     ap = AutoProcess(mockdir.dirn)
     update_fastq_stats(ap)
     # Check files now exist
     for filen in stats_files:
         self.assertTrue(os.path.exists(os.path.join(mockdir.dirn,filen)),
                          "%s: missing" % filen)
Пример #30
0
 def test_publish_qc_processing_qc(self):
     """publish_qc: processing QC report only
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create(no_project_dirs=True)
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing report
     UpdateAnalysisDir(ap).add_processing_report()
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn, 'QC')
     os.mkdir(publication_dir)
     # Publish QC
     publish_qc(ap, location=publication_dir)
     # Check outputs
     outputs = ("index.html", "processing_qc.html")
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
Пример #31
0
 def test_publish_qc_with_cellranger_qc_multiple_lanes_subsets(self):
     """publish_qc: publish cellranger QC output (multiple subsets of lanes)
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create(no_project_dirs=True)
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing and cellranger QC reports
     UpdateAnalysisDir(ap).add_processing_report()
     UpdateAnalysisDir(ap).add_cellranger_qc_output(lanes="45")
     UpdateAnalysisDir(ap).add_cellranger_qc_output(lanes="78")
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn, 'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap, location=publication_dir)
     # Check outputs
     outputs = [
         "index.html", "processing_qc.html",
         "cellranger_qc_summary_45.html", "cellranger_qc_summary_78.html"
     ]
     # Do checks
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
 def test_autoprocess_setup_existing_target_dir(self):
     """AutoProcess.setup works when target dir exists
     """
     # Create mock Illumina run directory
     mock_illumina_run = MockIlluminaRun(
         '160621_M00879_0087_000000000-AGEW9',
         'miseq',
         top_dir=self.dirn)
     mock_illumina_run.create()
     # Make a mock auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9',
         'miseq',
         top_dir=self.dirn)
     mockdir.create()
     # Do setup into existing analysis dir
     ap = AutoProcess()
     ap.setup(mock_illumina_run.dirn)
     self.assertTrue(os.path.isdir(
         '160621_M00879_0087_000000000-AGEW9'))
 def test_autoprocess_setup(self):
     """AutoProcess.setup works for mock MISeq run
     """
     # Create mock Illumina run directory
     mock_illumina_run = MockIlluminaRun(
         '151125_M00879_0001_000000000-ABCDE1',
         'miseq',
         top_dir=self.dirn)
     mock_illumina_run.create()
     # Set up autoprocessor
     ap = AutoProcess()
     ap.setup(mock_illumina_run.dirn)
     analysis_dirn = "%s_analysis" % mock_illumina_run.name
     # Check parameters
     self.assertEqual(ap.analysis_dir,
                      os.path.join(self.dirn,analysis_dirn))
     self.assertEqual(ap.params.data_dir,mock_illumina_run.dirn)
     self.assertEqual(ap.params.sample_sheet,
                      os.path.join(self.dirn,analysis_dirn,
                                   'custom_SampleSheet.csv'))
     self.assertEqual(ap.params.bases_mask,
                      'y101,I8,I8,y101')
     # Delete to force write of data to disk
     del(ap)
     # Check directory exists
     self.assertTrue(os.path.isdir(analysis_dirn))
     # Check files exists
     for filen in ('SampleSheet.orig.csv',
                   'custom_SampleSheet.csv',
                   'auto_process.info',
                   'metadata.info',):
         self.assertTrue(os.path.exists(os.path.join(analysis_dirn,
                                                     filen)),
                         "Missing file: %s" % filen)
     # Check subdirs have been created
     for subdirn in ('ScriptCode',
                     'logs',):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dirn,
                                                    subdirn)),
                         "Missing subdir: %s" % subdirn)
                     key = key_value[:i]
                     value = key_value[i+1:].strip("'").strip('"')
                     print "Setting '%s' to '%s'" % (key,value)
                     __settings.set(key,value)
                 except ValueError:
                     logging.error("Can't process '%s'" % options.key_value)
         # Save the updated settings to file
         __settings.save()
     else:
         # Report the current configuration settings
         __settings.report_settings()
 elif cmd == 'setup':
     if len(args) != 1:
         sys.stderr.write("Need to supply a data source location\n")
         sys.exit(1)
     d = AutoProcess()
     if options.fastq_dir is None:
         d.setup(args[0],
                 analysis_dir=options.analysis_dir,
                 sample_sheet=options.sample_sheet)
     else:
         d.setup_from_fastq_dir(args[0],options.fastq_dir)
 elif cmd == 'clone':
     if len(args) != 2:
         sys.stderr.write("Need to supply an existing analysis dir and "
                          "directory for the copy\n")
         sys.exit(1)
     d = AutoProcess(args[0],allow_save_params=False)
     d.clone(args[1],copy_fastqs=options.copy_fastqs)
 elif cmd == 'import_project':
     if len(args) == 0 or len(args) > 2: