Пример #1
0
 def test_checkforrun(self):
     """Check for the presence of runs in an Illumina SampleSheet.
     """
     fcdir = "fake/101007_80HM7ABXX"
     config = {"samplesheet_directories": [os.path.dirname(self.ss_file)]}
     ss = samplesheet.run_has_samplesheet(fcdir, config, False)
     assert ss is not None
     fcdir = "fake/101007_NOPEXX"
     ss = samplesheet.run_has_samplesheet(fcdir, config, False)
     assert ss is None
Пример #2
0
 def test_checkforrun(self):
     """Check for the presence of runs in an Illumina SampleSheet.
     """
     fcdir = "fake/101007_80HM7ABXX"
     config = {"samplesheet_directories": [os.path.dirname(self.ss_file)]}
     ss = samplesheet.run_has_samplesheet(fcdir, config, False)
     assert ss is not None
     fcdir = "fake/101007_NOPEXX"
     ss = samplesheet.run_has_samplesheet(fcdir, config, False)
     assert ss is None
Пример #3
0
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq):
    """Search for any new directories that have not been reported.
    """
    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and dname not in reported:
            if _is_finished_dumping(dname):
                log.info("The instrument has finished dumping on directory %s" % dname)
                _update_reported(config["msg_db"], dname)

                ss_file = samplesheet.run_has_samplesheet(dname, config)
                if ss_file:
                    out_file = os.path.join(dname, "run_info.yaml")
                    log.info("CSV Samplesheet %s found, converting to %s" %
                             (ss_file, out_file))
                    samplesheet.csv2yaml(ss_file, out_file)
                if qseq:
                    log.info("Generating qseq files for %s" % dname)
                    _generate_qseq(get_qseq_dir(dname), config)
                if fastq:
                    log.info("Generating fastq files for %s" % dname)
                    _generate_fastq(dname, config)

                store_files, process_files = _files_to_copy(dname)

                if process_msg:
                    finished_message(config["msg_process_tag"], dname,
                                     process_files, amqp_config)
                if store_msg:
                    finished_message(config["msg_store_tag"], dname,
                                     store_files, amqp_config)
Пример #4
0
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq):
    """Search for any new directories that have not been reported.
    """
    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and dname not in reported:
            if _is_finished_dumping(dname):
                log.info(
                    "The instrument has finished dumping on directory %s" %
                    dname)
                _update_reported(config["msg_db"], dname)

                ss_file = samplesheet.run_has_samplesheet(dname, config)
                if ss_file:
                    out_file = os.path.join(dname, "run_info.yaml")
                    log.info("CSV Samplesheet %s found, converting to %s" %
                             (ss_file, out_file))
                    samplesheet.csv2yaml(ss_file, out_file)
                if qseq:
                    log.info("Generating qseq files for %s" % dname)
                    _generate_qseq(get_qseq_dir(dname), config)
                if fastq:
                    log.info("Generating fastq files for %s" % dname)
                    _generate_fastq(dname, config)

                store_files, process_files = _files_to_copy(dname)

                if process_msg:
                    finished_message(config["msg_process_tag"], dname,
                                     process_files, amqp_config)
                if store_msg:
                    finished_message(config["msg_store_tag"], dname,
                                     store_files, amqp_config)
Пример #5
0
def _process_samplesheets(dname, config):
    """Process Illumina samplesheets into YAML files for post-processing.
    """
    ss_file = samplesheet.run_has_samplesheet(dname, config)
    if ss_file:
        out_file = os.path.join(dname, "run_info.yaml")
        log.info("CSV Samplesheet %s found, converting to %s" % (ss_file, out_file))
        samplesheet.csv2yaml(ss_file, out_file)
Пример #6
0
def _process_samplesheets(dname, config):
    """Process Illumina samplesheets into YAML files for post-processing.
    """
    ss_file = samplesheet.run_has_samplesheet(dname, config)
    if ss_file:
        out_file = os.path.join(dname, "run_info.yaml")
        logger2.info("CSV Samplesheet %s found, converting to %s" %
                     (ss_file, out_file))
        samplesheet.csv2yaml(ss_file, out_file)
Пример #7
0
def _generate_fastq_with_casava(fc_dir, config, r1=False):
    """Perform demultiplexing and generate fastq.gz files for the current
    flowecell using CASAVA (>1.8).
    """
    basecall_dir = os.path.join(fc_dir, "Data", "Intensities", "BaseCalls")
    casava_dir = config["program"].get("casava")
    unaligned_dir = os.path.join(fc_dir, "Unaligned")
    samplesheet_file = samplesheet.run_has_samplesheet(fc_dir, config)
    num_mismatches = config["algorithm"].get("mismatches", 1)
    num_cores = config["algorithm"].get("num_cores", 1)
    im_stats = config["algorithm"].get("ignore-missing-stats",False)
    im_bcl = config["algorithm"].get("ignore-missing-bcl",False)
    im_control = config["algorithm"].get("ignore-missing-control",False)
    
    # Write to log files
    configure_out = os.path.join(fc_dir,"configureBclToFastq.out")
    configure_err = os.path.join(fc_dir,"configureBclToFastq.err")
    casava_out = os.path.join(fc_dir,"bclToFastq_R{:d}.out".format(2-int(r1)))
    casava_err = os.path.join(fc_dir,"bclToFastq_R{:d}.err".format(2-int(r1)))

    cl = [os.path.join(casava_dir, "configureBclToFastq.pl")]
    cl.extend(["--input-dir", basecall_dir])
    cl.extend(["--output-dir", unaligned_dir])
    cl.extend(["--mismatches", str(num_mismatches)])
    cl.extend(["--fastq-cluster-count", "0"])
    if samplesheet_file is not None: cl.extend(["--sample-sheet", samplesheet_file])
    if im_stats: cl.append("--ignore-missing-stats")
    if im_bcl: cl.append("--ignore-missing-bcl")
    if im_control: cl.append("--ignore-missing-control")
    
    bm = _get_bases_mask(fc_dir)
    if bm is not None:
        cl.extend(["--use-bases-mask", bm])

    if r1:
        # Run configuration script
        logger2.info("Configuring BCL to Fastq conversion")
        logger2.debug(cl)
        
        co = open(configure_out,'w')
        ce = open(configure_err,'w')
        try:
            subprocess.check_call(cl,stdout=co,stderr=ce)
            co.close()
            ce.close()
        except subprocess.CalledProcessError, e:
            logger2.error("Configuring BCL to Fastq conversion for {:s} FAILED " \
                          "(exit code {}), please check log files {:s}, {:s}".format(fc_dir,
                                                                                     str(e.returncode),
                                                                                     configure_out,
                                                                                     configure_err))
            raise e
Пример #8
0
def initial_processing(*args, **kwargs):
    """Initial processing to be performed after the first base report
    """
    dname, config = args[0:2]
    # Touch the indicator flag that processing of read1 has been started
    utils.touch_indicator_file(os.path.join(dname, "initial_processing_started.txt"))

    # Copy the samplesheet to the run folder
    ss_file = samplesheet.run_has_samplesheet(dname, config)
    if ss_file:
        dst = os.path.join(dname,os.path.basename(ss_file))
        try:
            copyfile(ss_file,dst)
        except IOError, e:
            logger2.error("Error copying samplesheet {} from {} to {}: {}" \
                          "".format(os.path.basename(ss_file),
                                    os.path.dirname(ss_file),
                                    os.path.dirname(dst),
                                    e))
Пример #9
0
def _generate_fastq_with_casava(fc_dir, config, r1=False):
    """Perform demultiplexing and generate fastq.gz files for the current
    flowecell using CASAVA (>1.8).
    """
    basecall_dir = os.path.join(fc_dir, "Data", "Intensities", "BaseCalls")
    casava_dir = config["program"].get("casava")
    unaligned_dir = os.path.join(fc_dir, "Unaligned")
    samplesheet_file = samplesheet.run_has_samplesheet(fc_dir, config)
    num_mismatches = config["algorithm"].get("mismatches", 1)
    num_cores = config["algorithm"].get("num_cores", 1)

    cl = [os.path.join(casava_dir, "configureBclToFastq.pl")]
    cl.extend(["--input-dir", basecall_dir])
    cl.extend(["--output-dir", unaligned_dir])
    cl.extend(["--sample-sheet", samplesheet_file])
    cl.extend(["--mismatches", str(num_mismatches)])

    options = ["--fastq-cluster-count", "0", \
               "--ignore-missing-stats", \
               "--ignore-missing-bcl", \
               "--ignore-missing-control"]

    cl.extend(options)

    if r1:
        # Run configuration script
        logger2.info("Configuring BCL to Fastq conversion")
        logger2.debug(cl)
        subprocess.check_call(cl)

    # Go to <Unaligned> folder
    with utils.chdir(unaligned_dir):
        # Perform make
        cl = ["nohup", "make", "-j", str(num_cores)]
        if r1:
            cl.append("r1")

        logger2.info("Demultiplexing and converting bcl to fastq.gz")
        logger2.debug(cl)
        subprocess.check_call(cl)

    logger2.debug("Done")