Пример #1
0
def demux_run_standalone(run_id, input_run_data_path, fastq_output_dir, samplesheet_csv_path, nb_mismatch, conf):
    """ Demultiplexing the run with bcl2fastq on version parameter.

    Arguments:
        run_id: The run id
        input_run_data_path: input run data path to demultiplexing
        fastq_output_dir: fastq directory to save result on demultiplexing
        samplesheet_csv_path: samplesheet path in csv format, version used by bcl2fastq
        conf: configuration dictionary
    """

    bcl2fastq_executable_path = conf[BCL2FASTQ_PATH_KEY]
    tmp = conf[TMP_PATH_KEY]

    run_id_msg = " for run " + run_id + ' on ' + common.get_instrument_name(run_id, conf)
    bcl2fastq_log_file = tmp + "/bcl2fastq_output_" + run_id + ".err"

    # Check if the bcl2fastq path is OK
    if os.path.isdir(bcl2fastq_executable_path):
        bcl2fastq_executable_path += '/bcl2fastq'
    elif not os.path.isfile(bcl2fastq_executable_path):
        error("Error while setting executable command file bcl2fastq" + run_id_msg + ", invalid bcl2fastq path: " +
              bcl2fastq_executable_path, "Error while setting executable command file bcl2fastq" + run_id_msg +
              ", invalid bcl2fastq path: " + bcl2fastq_executable_path, conf)
        return False

    cmd = create_bcl2fastq_command_line(run_id, bcl2fastq_executable_path, input_run_data_path, fastq_output_dir,
                                        samplesheet_csv_path, tmp, nb_mismatch, conf)

    common.log('INFO', 'Demultiplexing in standalone mode using the following command line: ' + str(cmd), conf)

    exit_code = os.system(cmd)

    if exit_code != 0:
        error("Error while executing bcl2fastq " + run_id_msg,
              'Error while executing bcl2fastq (exit code: ' + str(
                  exit_code) + ').\nCommand line:\n' + cmd, conf)

        msg = 'Error while executing bcl2fastq ' + run_id_msg + ' (exit code: ' + str(
                  exit_code) + ').\nCommand line:\n' + cmd

        # Check if the log file has been generated
        if not os.path.exists(bcl2fastq_log_file):
            error("Error with bcl2fastq log for run " + run_id + ".", "No bcl2fastq log available", conf)
            common.send_msg('[Aozan] Failed demultiplexing ' + run_id_msg, msg, True, conf)
        else:
            msg += "\n\nPlease check the attached bcl2fastq output error file."
            common.send_msg_with_attachment('[Aozan] Failed demultiplexing ' + run_id_msg, msg, bcl2fastq_log_file, True, conf)

        return False

    return True
Пример #2
0
def send_report(run_id, conf):
    """Send a mail with the first base report.

    Arguments:
        run_id: the run id
        conf: configuration dictionary
    """

    #
    # Retrieve features the current run in RunInfos.xml file
    #

    run_info = hiseq_run.get_run_info(run_id, conf)

    if run_info is None:
        return False

    # TODO ?? add check sample-sheet if demux step enable
    # add warning in report if useful

    reads = run_info.getReads()
    error_cycles_per_read_not_indexes_count = 0
    reads_indexed_count = 0
    reads_not_indexed_count = 0
    cycles_count = 0
    cycles_per_read_not_indexed = 0

    for read in reads:
        cycles_count += read.getNumberCycles()
        if read.isIndexedRead():
            reads_indexed_count += 1
        else:
            reads_not_indexed_count += 1
            if cycles_per_read_not_indexed == 0:
                cycles_per_read_not_indexed = read.getNumberCycles()

            # Check same cycles count for each reads not indexed
            error_cycles_per_read_not_indexes_count = cycles_per_read_not_indexed != read.getNumberCycles()

    # Identification type run according to data in RunInfos.xml : SR or PE
    if reads_not_indexed_count == 1:
        type_run_estimated = "SR-" + str(cycles_per_read_not_indexed) + " with " + str(
            reads_indexed_count) + " index"
        if reads_indexed_count > 1:
            type_run_estimated += "es"
    elif reads_not_indexed_count == 2:
        type_run_estimated = "PE-" + str(cycles_per_read_not_indexed) + " with " + str(
            reads_indexed_count) + " index"
        if reads_indexed_count > 1:
            type_run_estimated += "es"
    else:
        type_run_estimated = "Undetermined run type (" + str(reads_not_indexed_count) + " reads with " + str(
            reads_indexed_count) + " index)"
        if reads_indexed_count > 1:
            type_run_estimated += "es"
        type_run_estimated += ")"

    description_run = "Informations about this run:\n"
    description_run += "\t- Sequencer: " + common.get_instrument_name(run_id, conf) + ".\n"
    description_run += "\t- " + str(run_info.getFlowCellLaneCount()) + " lanes with " + str(
        run_info.alignToPhix.size()) + " aligned to Phix.\n"
    description_run += "\t- " + str(reads_not_indexed_count) + " read"
    if reads_not_indexed_count > 1:
        description_run += "s"
    description_run += " and " + str(reads_indexed_count) + " index"
    if reads_indexed_count > 1:
        description_run += "es"
    description_run += ".\n"

    if error_cycles_per_read_not_indexes_count or cycles_per_read_not_indexed == 0:
        description_run += "\t- ERROR : cycles count per read different between reads (" + str(
            cycles_count) + " total cycles).\n"
    else:
        description_run += "\t- " + str(cycles_per_read_not_indexed) + " cycles per read (" + str(
            cycles_count) + " total cycles).\n"

    description_run += "\t- Estimated run type: " + type_run_estimated + ".\n"

    attachment_file = str(hiseq_run.find_hiseq_run_path(run_id, conf)) + '/' + run_id + '/' + common.FIRST_BASE_REPORT_FILE

    # If the First base report file exists, send it by email
    if common.is_file_readable(attachment_file):

        message = 'You will find attached to this message the first base report for the run ' + \
                  run_id + '.\n\n' + description_run
        common.send_msg_with_attachment('[Aozan] First base report for the run ' + type_run_estimated + '  ' + run_id +
                                        ' on ' + common.get_instrument_name(run_id, conf),
                                        message, attachment_file, False, conf)
    else:
        # With other no attachment file
        message = 'You will find below the parameters of the run ' + run_id + '.\n\n' + description_run
        common.send_msg('[Aozan] New run ' + type_run_estimated + ' ' + run_id + ' on ' +
                        common.get_instrument_name(run_id, conf), message,
                        False, conf)

    return True
Пример #3
0
def demux_run_with_docker(run_id, input_run_data_path, fastq_output_dir, samplesheet_csv_path, nb_mismatch, conf):
    """ Demultiplexing the run with bcl2fastq on version parameter with image Docker.

    Arguments:
        run_id: The run id
        input_run_data_path: input run data path to demultiplexing
        fastq_output_dir: fastq directory to save result on demultiplexing
        samplesheet_csv_path: samplesheet path in csv format, version used by bcl2fastq
        conf: configuration dictionary
    """

    # In docker mount with input_run_data_path
    input_docker = '/data/input'
    input_run_data_path_in_docker = input_docker
    run_id_msg = " for run " + run_id + ' on ' + common.get_instrument_name(run_id, conf)

    # In docker mount with fastq_output_dir
    output_docker = '/data/output'
    fastq_data_path_in_docker = output_docker + '/' + os.path.basename(fastq_output_dir)

    tmp = conf[TMP_PATH_KEY]
    tmp_docker = '/tmp'

    bcl2fastq_log_file = tmp + "/bcl2fastq_output_" + run_id + ".err"
    samplesheet_csv_docker = tmp_docker + '/' + os.path.basename(samplesheet_csv_path)

    cmd = create_bcl2fastq_command_line(run_id, None, input_run_data_path_in_docker, fastq_data_path_in_docker,
                                        samplesheet_csv_docker, tmp_docker, nb_mismatch, conf)

    try:
        # Set working in docker on parent demultiplexing run directory.
        # Demultiplexing run directory will create by bcl2fastq
        docker = DockerCommand(conf[Settings.DOCKER_URI_KEY], ['/bin/bash', '-c', cmd], 'bcl2fastq2', common.BCL2FASTQ2_VERSION)

        common.log("CONFIG", "Demultiplexing using docker image from " + docker.getImageDockerName() +
                   " with command line " + cmd, conf)

        common.log("CONFIG", "Bcl2fastq docker mount: " +
                   str(os.path.dirname(fastq_output_dir)) + ":" + str(output_docker) + "; " +
                   input_run_data_path + ":" + input_docker + "; " + tmp + ":" + tmp_docker, conf)

        # Mount input directory
        docker.addMountDirectory(input_run_data_path, input_docker)
        docker.addMountDirectory(os.path.dirname(fastq_output_dir), output_docker)
        docker.addMountDirectory(tmp, tmp_docker)

        docker.run()
        exit_code = docker.getExitValue()

        if exit_code != 0:
            error("Error while demultiplexing run " + run_id, 'Error while demultiplexing run (exit code: ' +
                  str(exit_code) + ').\nCommand line:\n' + cmd, conf)

            msg = 'Error while executing bcl2fastq ' + run_id_msg + ' (exit code: ' + str(
                  exit_code) + ').\nCommand line:\n' + cmd

            # Check if the log file has been generated
            if not os.path.exists(bcl2fastq_log_file):
                error("Error with bcl2fastq log for run " + run_id + ".", "No bcl2fastq log available " + bcl2fastq_log_file, conf)
                common.send_msg('[Aozan] Failed demultiplexing ' + run_id_msg, msg, True, conf)
            else:
                msg += "\n\nPlease check the attached bcl2fastq output error file."
                common.send_msg_with_attachment('[Aozan] Failed demultiplexing ' + run_id_msg, msg, bcl2fastq_log_file, True, conf)

            return False

    except Throwable, exp:
        error("Error while running Docker image", common.exception_msg(exp, conf), conf)
        return False
Пример #4
0
    sessions = [Settings.HISEQ_STEP_KEY, Settings.DEMUX_STEP_KEY, Settings.QC_STEP_KEY]
    common.create_html_index_file(conf, run_id, sessions)

    df_in_bytes = common.df(qc_output_dir)
    du_in_bytes = common.du(qc_output_dir)
    df = df_in_bytes / (1024 * 1024 * 1024)
    du = du_in_bytes / (1024 * 1024)

    common.log("WARNING", "QC step: output disk free after QC: " + str(df_in_bytes), conf)
    common.log("WARNING", "QC step: space used by QC: " + str(du_in_bytes), conf)

    duration = time.time() - start_time

    msg = 'Ending quality control for run ' + run_id + '.' + \
          '\nJob finished at ' + common.time_to_human_readable(time.time()) + \
          ' without error in ' + common.duration_to_human_readable(duration) + '. ' + \
          'You will find attached to this message the quality control report.\n\n' + \
          'QC files for this run ' + \
          'can be found in the following directory:\n  ' + qc_output_dir

    # Add path to report if reports.url exists
    if common.is_conf_key_exists(REPORTS_URL_KEY, conf):
        msg += '\n\nRun reports can be found at following location:\n  ' + conf[REPORTS_URL_KEY] + '/' + run_id

    msg += '\n\nFor this task %.2f MB has been used and %.2f GB still free.' % (du, df)

    common.send_msg_with_attachment('[Aozan] Ending quality control for run ' + run_id + ' on ' +
                                    common.get_instrument_name(run_id, conf), msg, html_report_file, False, conf)
    common.log('INFO', 'QC step: successful in ' + common.duration_to_human_readable(duration), conf)
    return True