Пример #1
0
    def test_get_sequencer_platform_from_instrument_name_and_settings(self):
        """
        get_sequencer_platform: use instrument name and settings
        """
        settings_ini = os.path.join(self.dirn,"settings.ini")
        with open(settings_ini,'w') as s:
            s.write("""[sequencers]
SN7001251 = hiseq
M00880 = miseq
FS10000171 = iseq
""")
        settings = Settings(settings_ini)
        print settings.sequencers
        self.assertEqual(get_sequencer_platform(
            "/mnt/data/120919_BLAH_0035_BC133VACXX",
            instrument="SN7001251",
            settings=settings),"hiseq")
        self.assertEqual(get_sequencer_platform(
            "/mnt/data/121210_BLAH_0001_000000000-A2Y1L",
            instrument="M00880",
            settings=settings),"miseq")
        self.assertEqual(get_sequencer_platform(
            "/mnt/data/20180829_BLAH_3_BNT40323-1530",
            instrument="FS10000171",
            settings=settings),"iseq")
Пример #2
0
def verify_qc(project,
              qc_dir=None,
              fastq_dir=None,
              qc_protocol=None,
              runner=None,
              log_dir=None):
    """
    Verify the QC run for a project

    Arguments:
      project (AnalysisProject): analysis project
        to verify the QC for
      qc_dir (str): optional, specify the subdir with
        the QC outputs being verified
      fastq_dir (str): optional, specify a non-default
        directory with Fastq files being verified
      qc_protocol (str): optional, QC protocol to
        verify against
      runner (JobRunner): optional, job runner to use
        for running the verification
      log_dir (str): optional, specify a directory to
        write logs to

    Returns:
      Boolean: True if QC passes verification, otherwise
        False.
    """
    # Sort out runners
    if runner is None:
        runner = Settings().general.default_runner
    # Construct command for QC verification
    verify_cmd = Command("reportqc.py", "--verify")
    if qc_protocol is not None:
        verify_cmd.add_args("--protocol", qc_protocol)
    if qc_dir is not None:
        verify_cmd.add_args("--qc_dir", qc_dir)
    if fastq_dir is not None:
        verify_cmd.add_args("--fastq_dir", fastq_dir)
    verify_cmd.add_args(project.dirn)
    # Run the command
    verify = SchedulerJob(runner,
                          verify_cmd.command_line,
                          name="verify_qc.%s" % project.name,
                          working_dir=project.dirn,
                          log_dir=log_dir)
    verify.start()
    try:
        verify.wait()
    except KeyboardInterrupt, ex:
        logger.warning("Keyboard interrupt, terminating QC verification")
        verify.terminate()
        raise ex
Пример #3
0
    def setUp(self):
        # Create a temp working dir
        self.dirn = tempfile.mkdtemp(suffix='TestArchiveCommand')
        # Create settings instance
        # This allows us to set the polling interval for the
        # unit tests
        settings_ini = os.path.join(self.dirn,"settings.ini")
        with open(settings_ini,'w') as s:
            s.write("""[general]
poll_interval = 0.5
""")
        self.settings = Settings(settings_ini)
        # Store original location so we can get back at the end
        self.pwd = os.getcwd()
        # Move to working dir
        os.chdir(self.dirn)
Пример #4
0
    def setUp(self):
        # Create a temp working dir
        self.dirn = tempfile.mkdtemp(suffix='TestAutoProcessMergeFastqDirs')
        # Create settings instance
        # This allows us to set the polling interval for the
        # unit tests
        settings_ini = os.path.join(self.dirn, "auto_process.ini")
        with open(settings_ini, 'w') as s:
            s.write("""[general]
poll_interval = 0.5
""")
        self.settings = Settings(settings_ini)
        # Store original location so we can get back at the end
        self.pwd = os.getcwd()
        # Move to working dir
        os.chdir(self.dirn)
        # Placeholders for test objects
        self.ap = None
    def setUp(self):
        # Create a temp working dir
        self.wd = tempfile.mkdtemp(suffix='TestAutoProcessAnalyseBarcodes')
        # Create settings instance
        # This allows us to set the polling interval for the
        # unit tests
        settings_ini = os.path.join(self.wd,"settings.ini")
        with open(settings_ini,'w') as s:
            s.write("""[general]
poll_interval = 0.5
""")
        self.settings = Settings(settings_ini)
        # Store original location
        self.pwd = os.getcwd()
        # Move to working dir
        os.chdir(self.wd)
        # Placeholders for test objects
        self.ap = None
Пример #6
0
    def test_run_qc(self):
        """run_qc: standard QC run
        """
        # Make mock illumina_qc.sh and multiqc
        MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
        MockMultiQC.create(os.path.join(self.bin, "multiqc"))
        os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
        # Make mock analysis directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={"instrument_datestamp": "170901"},
            top_dir=self.dirn)
        mockdir.create()
        # Settings file with polling interval
        settings_ini = os.path.join(self.dirn, "settings.ini")
        with open(settings_ini, 'w') as s:
            s.write("""[general]
poll_interval = 0.5
""")
        # Make autoprocess instance
        ap = AutoProcess(analysis_dir=mockdir.dirn,
                         settings=Settings(settings_ini))
        # Run the QC
        status = run_qc(ap, run_multiqc=True, max_jobs=1)
        self.assertEqual(status, 0)
        # Check output and reports
        for p in ("AB", "CDE", "undetermined"):
            for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" %
                      (p, '170901_M00879_0087_000000000-AGEW9'),
                      "multiqc_report.html"):
                self.assertTrue(
                    os.path.exists(os.path.join(mockdir.dirn, p, f)),
                    "Missing %s in project '%s'" % (f, p))
            # Check zip file has MultiQC report
            zip_file = os.path.join(
                mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" %
                (p, '170901_M00879_0087_000000000-AGEW9'))
            with zipfile.ZipFile(zip_file) as z:
                multiqc = os.path.join(
                    "qc_report.%s.%s_analysis" %
                    (p, '170901_M00879_0087_000000000-AGEW9'),
                    "multiqc_report.html")
                self.assertTrue(multiqc in z.namelist())
Пример #7
0
    def test_get_sequencer_platform_unknown_instrument(self):
        """
        get_sequencer_platform: handle unknown instrument name
        """
        settings_ini = os.path.join(self.dirn,"settings.ini")
        with open(settings_ini,'w') as s:
            s.write("""[sequencers]
SN7001251 = hiseq
M00880 = miseq
FS10000171 = iseq
""")
        settings = Settings(settings_ini)
        print settings.sequencers
        self.assertEqual(get_sequencer_platform(
            "/mnt/data/120919_BLAH_0035_BC133VACXX"),None)
        self.assertEqual(get_sequencer_platform(
            "/mnt/data/120919_BLAH_0035_BC133VACXX",
            settings=settings),None)
        self.assertEqual(get_sequencer_platform(
            "/mnt/data/120919_BLAH_0035_BC133VACXX",
            instrument="BLEURGH",
            settings=settings),None)
Пример #8
0
def report_qc(project,
              qc_dir=None,
              fastq_dir=None,
              qc_protocol=None,
              report_html=None,
              zip_outputs=True,
              multiqc=False,
              force=False,
              runner=None,
              log_dir=None):
    """
    Generate report for the QC run for a project

    Arguments:
      project (AnalysisProject): analysis project
        to report the QC for
      qc_dir (str): optional, specify the subdir with
        the QC outputs being reported
      fastq_dir (str): optional, specify a non-default
        directory with Fastq files being verified
      qc_protocol (str): optional, QC protocol to
        verify against
      report_html (str): optional, path to the name of
        the output QC report
      zip_outputs (bool): if True then also generate ZIP
        archive with the report and QC outputs
      multiqc (bool): if True then also generate MultiQC
        report
      force (bool): if True then force generation of
        QC report even if verification fails
      runner (JobRunner): optional, job runner to use
        for running the reporting
      log_dir (str): optional, specify a directory to
        write logs to

    Returns:
      Integer: exit code from reporting job (zero indicates
        success, non-zero indicates a problem).
    """
    # Sort out runners
    if runner is None:
        runner = Settings().general.default_runner
    # Basename for the outputs
    if qc_dir is None:
        qc_base = os.path.basename(project.qc_dir)
    else:
        qc_base = os.path.basename(qc_dir)
    # Report HTML file name
    if report_html is None:
        out_file = '%s_report.html' % qc_base
    else:
        out_file = report_html
    if not os.path.isabs(out_file):
        out_file = os.path.join(project.dirn, out_file)
    # Report title
    if project.info.run is None:
        title = "%s" % project.name
    else:
        title = "%s/%s" % (project.info.run, project.name)
    if fastq_dir is not None:
        title = "%s (%s)" % (title, fastq_dir)
    title = "%s: QC report" % title
    # Construct command for reporting
    report_cmd = Command("reportqc.py", "--filename", out_file, "--title",
                         title)
    if qc_protocol is not None:
        verify_cmd.add_args("--protocol", qc_protocol)
    if qc_dir is not None:
        report_cmd.add_args("--qc_dir", qc_dir)
    if fastq_dir is not None:
        report_cmd.add_args("--fastq_dir", fastq_dir)
    if multiqc:
        report_cmd.add_args("--multiqc")
    if zip_outputs:
        report_cmd.add_args("--zip")
    if force:
        report_cmd.add_args("--force")
    report_cmd.add_args(project.dirn)
    # Run the command
    report = SchedulerJob(runner,
                          report_cmd.command_line,
                          name="report_qc.%s" % project.name,
                          working_dir=project.dirn,
                          log_dir=log_dir)
    report.start()
    try:
        report.wait()
    except KeyboardInterrupt, ex:
        logger.warning("Keyboard interrupt, terminating QC reporting")
        report.terminate()
        raise ex
Пример #9
0
    def test_run_qc_with_strandedness(self):
        """run_qc: standard QC run with strandedness determination
        """
        # Make mock illumina_qc.sh and multiqc
        MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
        MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
        MockMultiQC.create(os.path.join(self.bin, "multiqc"))
        os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
        # Make mock analysis directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={"instrument_datestamp": "170901"},
            project_metadata={
                "AB": {
                    "Organism": "human",
                },
                "CDE": {
                    "Organism": "mouse",
                }
            },
            top_dir=self.dirn)
        mockdir.create()
        # Settings file with fastq_strand indexes and
        # polling interval
        settings_ini = os.path.join(self.dirn, "settings.ini")
        with open(settings_ini, 'w') as s:
            s.write("""[general]
poll_interval = 0.5

[fastq_strand_indexes]
human = /data/genomeIndexes/hg38/STAR
mouse = /data/genomeIndexes/mm10/STAR
""")
        # Make autoprocess instance
        ap = AutoProcess(analysis_dir=mockdir.dirn,
                         settings=Settings(settings_ini))
        # Run the QC
        status = run_qc(ap, run_multiqc=True, max_jobs=1)
        self.assertEqual(status, 0)
        # Check the fastq_strand_conf files were created
        for p in ("AB", "CDE"):
            self.assertTrue(
                os.path.exists(
                    os.path.join(mockdir.dirn, p, "qc", "fastq_strand.conf")))
        # Check fastq_strand outputs are present
        for p in ("AB", "CDE"):
            fastq_strand_outputs = filter(
                lambda f: f.endswith("fastq_strand.txt"),
                os.listdir(os.path.join(mockdir.dirn, p, "qc")))
            self.assertTrue(len(fastq_strand_outputs) > 0)
        # Check output and reports
        for p in ("AB", "CDE", "undetermined"):
            for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" %
                      (p, '170901_M00879_0087_000000000-AGEW9'),
                      "multiqc_report.html"):
                self.assertTrue(
                    os.path.exists(os.path.join(mockdir.dirn, p, f)),
                    "Missing %s in project '%s'" % (f, p))
            # Check zip file has MultiQC report
            zip_file = os.path.join(
                mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" %
                (p, '170901_M00879_0087_000000000-AGEW9'))
            with zipfile.ZipFile(zip_file) as z:
                multiqc = os.path.join(
                    "qc_report.%s.%s_analysis" %
                    (p, '170901_M00879_0087_000000000-AGEW9'),
                    "multiqc_report.html")
                self.assertTrue(multiqc in z.namelist())