示例#1
0
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    try:
        best_practice_analysis = charon_session.project_get(
            project.project_id)["best_practice_analysis"]
        if not best_practice_analysis:
            raise KeyError(
                "For once in my life ever can't you just fill in the forms properly"
            )
    except KeyError:
        error_msg = (
            'No best practice analysis specified in Charon for '
            'project "{}". Using "whole_genome_reseq"'.format(project))
        LOG.error(error_msg)
        best_practice_analysis = "whole_genome_reseq"
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
示例#2
0
def get_project_id_from_name(project_name):
    """Given the project name ("Y.Mom_14_01") return the project ID ("P123")

    :param str project_name: The human-friendly name of the project (e.g. "J.Doe_14_01")

    :returns: The alphanumeric database-friendly name of the project (e.g. "P123")
    :rtype: str

    :raises RuntimeError: If there is some problem relating to the GET (HTTP Return code != 200)
    :raises ValueError: If the project has no project id in the database or if the project does not exist in Charon
    """
    charon_session = CharonSession()

    try:
        project_id = charon_session.project_get(project_name)
    except CharonError as e:
        if e.status_code == 404:
            new_e = ValueError('Project "{}" missing from database: {}'.format(project_name, e))
            new_e.status_code = 404
            raise e
        else:
            raise
    try:
        return project_id['projectid']
    except KeyError:
        raise ValueError('Couldn\'t retrieve project id for project "{}"; '
                         'this project\'s database entry has no "projectid" value.'.format(project))
示例#3
0
def get_project_id_from_name(project_name):
    """Given the project name ("Y.Mom_14_01") return the project ID ("P123")

    :param str project_name: The human-friendly name of the project (e.g. "J.Doe_14_01")

    :returns: The alphanumeric database-friendly name of the project (e.g. "P123")
    :rtype: str

    :raises RuntimeError: If there is some problem relating to the GET (HTTP Return code != 200)
    :raises ValueError: If the project has no project id in the database or if the project does not exist in Charon
    """
    charon_session = CharonSession()

    try:
        project_id = charon_session.project_get(project_name)
    except CharonError as e:
        if e.status_code == 404:
            new_e = ValueError('Project "{}" missing from database: {}'.format(
                project_name, e))
            new_e.status_code = 404
            raise e
        else:
            raise
    try:
        return project_id['projectid']
    except KeyError:
        raise ValueError(
            'Couldn\'t retrieve project id for project "{}"; '
            'this project\'s database entry has no "projectid" value.'.format(
                project))
 def get_delivery_token_in_charon(self):
     '''fetches delivery_token from Charon
     '''
     charon_session = CharonSession()
     project_charon = charon_session.project_get(self.projectid)
     if project_charon.get('delivery_token'):
         return project_charon.get('delivery_token')
     else:
         return 'NO-TOKEN'
def analyze(analysis_object, config=None, config_file_path=None):

    charon_session = CharonSession()
    charon_pj=charon_session.project_get(analysis_object.project.project_id)
    reference_genome=charon_pj.get('reference')
    if charon_pj.get("sequencing_facility") == "NGI-S":
        analysis_object.sequencing_facility="sthlm"
    elif charon_pj.get("sequencing_facility") == "NGI-U":
        analysis_object.sequencing_facility="upps"
    else:
        LOG.error("charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?")
        raise RuntimeError
    fastq_files=[]
    if reference_genome and reference_genome != 'other':
        for sample in analysis_object.project:
            try:
                charon_reported_status = charon_session.sample_get(analysis_object.project.project_id,
                                                                   sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                do_analyze=handle_sample_status(analysis_object, sample, charon_reported_status)
                if not do_analyze :
                    continue
            except CharonError as e:
                LOG.error(e)

            for libprep in sample:
                charon_lp_status=charon_session.libprep_get(analysis_object.project.project_id, sample.name, libprep.name).get('qc')
                do_analyze=handle_libprep_status(analysis_object, libprep, charon_lp_status)
                if not do_analyze :
                    continue
                else:
                    for seqrun in libprep:
                        charon_sr_status=charon_session.seqrun_get(analysis_object.project.project_id, sample.name, libprep.name, seqrun.name).get('alignment_status')
                        do_analyze=handle_seqrun_status(analysis_object, seqrun, charon_sr_status)
                        if not do_analyze :
                            continue
                        else:
                            seqrun.being_analyzed=True
                            sample.being_analyzed = sample.being_analyzed or True
                            # filter out index files from analysis
                            for fastq_file in filter(lambda f: not is_index_file(f), seqrun.fastq_files):
                                fastq_path=os.path.join(analysis_object.project.base_path, "DATA", analysis_object.project.project_id, sample.name, libprep.name, seqrun.name, fastq_file)
                                fastq_files.append(fastq_path)
        
        if not fastq_files:
            LOG.error("No fastq files obtained for the analysis fo project {}, please check the Charon status.".format(analysis_object.project.name))
        else :
            if analysis_object.restart_running_jobs:
                stop_ongoing_analysis(analysis_object)
            fastq_dir=preprocess_analysis(analysis_object, fastq_files)
            sbatch_path=write_batch_job(analysis_object, reference_genome, fastq_dir)
            job_id=start_analysis(sbatch_path)
            analysis_path=os.path.join(analysis_object.project.base_path, "ANALYSIS", analysis_object.project.project_id, 'rna_ngi')
            record_project_job(analysis_object.project, job_id, analysis_path)
示例#6
0
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    best_practice_analysis = charon_session.project_get(project.project_id)["best_practice_analysis"]
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
示例#7
0
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    best_practice_analysis = charon_session.project_get(
        project.project_id)["best_practice_analysis"]
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
 def add_supr_name_delivery_in_charon(self, supr_name_of_delivery):
     '''Updates delivery_projects in Charon at project level
     '''
     charon_session = CharonSession()
     try:
         #fetch the project
         project_charon = charon_session.project_get(self.projectid)
         delivery_projects = project_charon['delivery_projects']
         if supr_name_of_delivery not in delivery_projects:
             delivery_projects.append(supr_name_of_delivery)
             charon_session.project_update(self.projectid, delivery_projects=delivery_projects)
             logger.info('Charon delivery_projects for project {} updated with value {}'.format(self.projectid, supr_name_of_delivery))
         else:
             logger.warn('Charon delivery_projects for project {} not updated with value {} because the value was already present'.format(self.projectid, supr_name_of_delivery))
     except Exception, e:
         logger.error('Failed to update delivery_projects in charon while delivering {}. Error says: {}'.format(self.projectid, e))
         logger.exception(e)
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    try:
        best_practice_analysis = charon_session.project_get(project.project_id)["best_practice_analysis"]
    except KeyError:
        error_msg = ('No best practice analysis specified in Charon for '
                     'project "{}". Using "whole_genome_reseq"'.format(project))
        raise RuntimeError(error_msg)
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
示例#10
0
 def add_dds_name_delivery_in_charon(self, name_of_delivery):
     """Updates delivery_projects in Charon at project level
     """
     charon_session = CharonSession()
     try:
         #fetch the project
         project_charon = charon_session.project_get(self.projectid)
         delivery_projects = project_charon['delivery_projects']
         if name_of_delivery not in delivery_projects:
             delivery_projects.append(name_of_delivery)
             charon_session.project_update(self.projectid, delivery_projects=delivery_projects)
             logger.info('Charon delivery_projects for project {} '
                         'updated with value {}'.format(self.projectid, name_of_delivery))
         else:
             logger.warn('Charon delivery_projects for project {} not updated '
                         'with value {} because the value was already present'.format(self.projectid, name_of_delivery))
     except Exception as e:
         logger.exception('Failed to update delivery_projects in charon while '
                      'delivering {}.'.format(self.projectid))
示例#11
0
class DbConnections():
    def __init__(self):
        with open(os.getenv('STATUS_DB_CONFIG'), 'r') as db_cred_file:
            db_conf = yaml.load(db_cred_file)['statusdb']
        self.statusdbSess = sdb(db_conf, db="projects")
        self.CharonSess = CharonSession()

    def add_delivery_proj_in_charon(self, delivery_proj, projectid):
        '''Updates delivery_projects in Charon at project level
        '''
        try:
            #fetch the project
            project_charon = self.CharonSess.project_get(projectid)
            delivery_projects = project_charon['delivery_projects']
            if delivery_proj not in delivery_projects:
                delivery_projects.append(delivery_proj)
                self.CharonSess.project_update(projectid, delivery_projects=delivery_projects)
                logger.info('Charon delivery_projects for project {} updated with value {}'.format(projectid, delivery_proj))
            else:
                logger.warn('Charon delivery_projects for project {} not updated with value {} because the value was already present'.format(projectid, delivery_proj))
        except Exception, e:
            logger.error('Failed to update delivery_projects in charon for {}. Error says: {}'.format(projectid, e))
            logger.exception(e)
def build_setup_xml(project, sample, workflow, local_scratch_mode, config):
    """Build the setup.xml file for each project using the CLI-interface of
    Piper's SetupFileCreator.

    :param NGIProject project: The project to be converted.
    :param NGISample sample: the sample object
    :param str workflow: The name of the workflow to be executed
    :param bool local_scratch_mode: Whether the job will be run in scratch or permanent storage
    :param dict config: The (parsed) configuration file for this machine/environment.

    :raises ValueError: If a required configuration file value is missing
    :raises RuntimeError: If the setupFileCreator returns non-zero
    """
    LOG.info('Building Piper setup.xml file for project "{}" '
             'sample "{}"'.format(project, sample.name))

    if local_scratch_mode:
        project_top_level_dir = os.path.join("$SNIC_TMP/DATA/", project.dirname)
        analysis_dir = os.path.join("$SNIC_TMP/ANALYSIS/", project.dirname, "piper_ngi")
        # Can't create these directories ahead of time of course
    else:
        project_top_level_dir = os.path.join(project.base_path, "DATA", project.dirname)
        analysis_dir = os.path.join(project.base_path, "ANALYSIS", project.dirname, "piper_ngi")
        safe_makedir(analysis_dir)

    cl_args = {'project': project.dirname}
    try:
        charon_session = CharonSession()
        charon_project = charon_session.project_get(project.project_id)
        cl_args["sequencing_center"] = charon_project["sequencing_facility"]
    except (KeyError, CharonError) as e:
        LOG.warn('Could not determine sequencing center from Charon ({}); setting to "Unknown".'.format(e))
        cl_args["sequencing_center"] = "Unknown"
    cl_args["sequencing_tech"] = "Illumina"
    slurm_qos = config.get("slurm", {}).get("extra_params", {}).get("--qos")
    if slurm_qos:
        cl_args["qos"] = slurm_qos

    # TODO Eventually this will be loaded from e.g. Charon
    reference_genome = 'GRCh37'
    try:
        cl_args["reference_path"] = config['supported_genomes'][reference_genome]
        cl_args["uppmax_proj"] = config['environment']['project_id']
    except KeyError as e:
        error_msg = ("Could not load required information from "
                     "configuration file and cannot continue with project {}: "
                     "value \"{}\" missing".format(project, e.message))
        raise ValueError(error_msg)

    try:
        cl_args["sfc_binary"] = config['piper']['path_to_setupfilecreator']
    except KeyError:
        cl_args["sfc_binary"] = "setupFileCreator" # Assume setupFileCreator is on path

    # setup XML file is always stored in permanent analysis directory
    output_xml_filepath = os.path.join(project.base_path, "ANALYSIS",
                                       project.dirname, "piper_ngi", "setup_xml_files",
                                       "{}-{}-{}-setup.xml".format(project, sample, workflow))
    safe_makedir(os.path.dirname(output_xml_filepath))
    cl_args["output_xml_filepath"] = output_xml_filepath
    setupfilecreator_cl = ("{sfc_binary} "
                           "--output {output_xml_filepath} "
                           "--project_name {project} "
                           "--sequencing_platform {sequencing_tech} "
                           "--sequencing_center {sequencing_center} "
                           "--uppnex_project_id {uppmax_proj} "
                           "--reference {reference_path}").format(**cl_args)
    if "qos" in cl_args:
        setupfilecreator_cl += " --qos {qos}".format(**cl_args)
    for samp in project:
        for libprep in samp:
            for seqrun in libprep:
                sample_run_directory = os.path.join(project_top_level_dir, sample.dirname,
                                                    libprep.dirname, seqrun.dirname)
                for fastq_file_name in seqrun.fastq_files:
                    fastq_file = os.path.join(sample_run_directory, fastq_file_name)
                    setupfilecreator_cl += " --input_fastq {}".format(fastq_file)
    return (setupfilecreator_cl, output_xml_filepath)
示例#13
0
def launch_analysis(projects_to_analyze, restart_failed_jobs=False,
                    restart_finished_jobs=False, restart_running_jobs=False,
                    keep_existing_data=False, no_qc=False, exec_mode="sbatch",
                    quiet=False, manual=False, config=None, config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    charon_session = CharonSession()
    for project in projects_to_analyze:
        analysis=NGIAnalysis(project=project, restart_failed_jobs=restart_failed_jobs,
                    restart_finished_jobs=restart_finished_jobs,
                    restart_running_jobs=restart_running_jobs,
                    keep_existing_data=keep_existing_data, no_qc=no_qc,
                    exec_mode=exec_mode, quiet=quiet, manual=manual,
                    config=config, config_file_path=config_file_path,
                    generate_bqsr_bam=generate_bqsr_bam, log=LOG)
        #update charon with the current analysis status
        analysis.engine.local_process_tracking.update_charon_with_local_jobs_status(config=config)
        try:
            project_status = charon_session.project_get(project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(project, e))
            continue
        if not project_status == "OPEN":
            error_text = ('Data found on filesystem for project "{}" but Charon '
                          'reports its status is not OPEN ("{}"). Not launching '
                          'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name, level="ERROR", info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e: # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample, qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = ('Cannot process project "{}" / sample "{}" / '
                                  'engine "{}" : {}'.format(project, sample,
                                                            analysis_module.__name__,
                                                            e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name, sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR", info_text=e)
            # Launch actual best-practice analysis
        analysis.engine.analyze(analysis)
示例#14
0
def analyze(analysis_object, config=None, config_file_path=None):

    charon_session = CharonSession()
    charon_pj = charon_session.project_get(analysis_object.project.project_id)
    reference_genome = charon_pj.get('reference')
    if charon_pj.get("sequencing_facility") == "NGI-S":
        analysis_object.sequencing_facility = "sthlm"
    elif charon_pj.get("sequencing_facility") == "NGI-U":
        analysis_object.sequencing_facility = "upps"
    else:
        LOG.error(
            "charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?"
        )
        raise RuntimeError
    fastq_files = []
    if reference_genome and reference_genome != 'other':
        for sample in analysis_object.project:
            try:
                charon_reported_status = charon_session.sample_get(
                    analysis_object.project.project_id,
                    sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                do_analyze = handle_sample_status(analysis_object, sample,
                                                  charon_reported_status)
                if not do_analyze:
                    continue
            except CharonError as e:
                LOG.error(e)

            for libprep in sample:
                charon_lp_status = charon_session.libprep_get(
                    analysis_object.project.project_id, sample.name,
                    libprep.name).get('qc')
                do_analyze = handle_libprep_status(analysis_object, libprep,
                                                   charon_lp_status)
                if not do_analyze:
                    continue
                else:
                    for seqrun in libprep:
                        charon_sr_status = charon_session.seqrun_get(
                            analysis_object.project.project_id, sample.name,
                            libprep.name, seqrun.name).get('alignment_status')
                        do_analyze = handle_seqrun_status(
                            analysis_object, seqrun, charon_sr_status)
                        if not do_analyze:
                            continue
                        else:
                            seqrun.being_analyzed = True
                            sample.being_analyzed = sample.being_analyzed or True
                            for fastq_file in seqrun.fastq_files:
                                fastq_path = os.path.join(
                                    analysis_object.project.base_path, "DATA",
                                    analysis_object.project.project_id,
                                    sample.name, libprep.name, seqrun.name,
                                    fastq_file)
                                fastq_files.append(fastq_path)

        if not fastq_files:
            LOG.error(
                "No fastq files obtained for the analysis fo project {}, please check the Charon status."
                .format(analysis_object.project.name))
        else:
            if analysis_object.restart_running_jobs:
                stop_ongoing_analysis(analysis_object)
            fastq_dir = preprocess_analysis(analysis_object, fastq_files)
            sbatch_path = write_batch_job(analysis_object, reference_genome,
                                          fastq_dir)
            job_id = start_analysis(sbatch_path)
            analysis_path = os.path.join(analysis_object.project.base_path,
                                         "ANALYSIS",
                                         analysis_object.project.project_id,
                                         'rna_ngi')
            record_project_job(analysis_object.project, job_id, analysis_path)
示例#15
0
def launch_analysis(projects_to_analyze,
                    restart_failed_jobs=False,
                    restart_finished_jobs=False,
                    restart_running_jobs=False,
                    keep_existing_data=False,
                    no_qc=False,
                    exec_mode="sbatch",
                    quiet=False,
                    manual=False,
                    config=None,
                    config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    charon_session = CharonSession()
    for project in projects_to_analyze:
        analysis = NGIAnalysis(project=project,
                               restart_failed_jobs=restart_failed_jobs,
                               restart_finished_jobs=restart_finished_jobs,
                               restart_running_jobs=restart_running_jobs,
                               keep_existing_data=keep_existing_data,
                               no_qc=no_qc,
                               exec_mode=exec_mode,
                               quiet=quiet,
                               manual=manual,
                               config=config,
                               config_file_path=config_file_path,
                               generate_bqsr_bam=generate_bqsr_bam,
                               log=LOG)
        #update charon with the current analysis status
        analysis.engine.local_process_tracking.update_charon_with_local_jobs_status(
            config=config)
        try:
            project_status = charon_session.project_get(
                project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(
                project, e))
            continue
        if not project_status == "OPEN":
            error_text = (
                'Data found on filesystem for project "{}" but Charon '
                'reports its status is not OPEN ("{}"). Not launching '
                'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name,
                              level="ERROR",
                              info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e:  # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(
                project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample,
                                           qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = (
                        'Cannot process project "{}" / sample "{}" / '
                        'engine "{}" : {}'.format(project, sample,
                                                  analysis_module.__name__, e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name,
                                      sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR",
                                      info_text=e)
            # Launch actual best-practice analysis
        analysis.engine.analyze(analysis)
示例#16
0
def build_setup_xml(project, sample, workflow, local_scratch_mode, config):
    """Build the setup.xml file for each project using the CLI-interface of
    Piper's SetupFileCreator.

    :param NGIProject project: The project to be converted.
    :param NGISample sample: the sample object
    :param str workflow: The name of the workflow to be executed
    :param bool local_scratch_mode: Whether the job will be run in scratch or permanent storage
    :param dict config: The (parsed) configuration file for this machine/environment.

    :raises ValueError: If a required configuration file value is missing
    :raises RuntimeError: If the setupFileCreator returns non-zero
    """
    LOG.info('Building Piper setup.xml file for project "{}" '
             'sample "{}"'.format(project, sample.name))

    if local_scratch_mode:
        project_top_level_dir = os.path.join("$SNIC_TMP/DATA/",
                                             project.dirname)
        analysis_dir = os.path.join("$SNIC_TMP/ANALYSIS/", project.dirname,
                                    "piper_ngi")
        # Can't create these directories ahead of time of course
    else:
        project_top_level_dir = os.path.join(project.base_path, "DATA",
                                             project.dirname)
        analysis_dir = os.path.join(project.base_path, "ANALYSIS",
                                    project.dirname, "piper_ngi")
        safe_makedir(analysis_dir)

    cl_args = {'project': project.dirname}
    try:
        charon_session = CharonSession()
        charon_project = charon_session.project_get(project.project_id)
        cl_args["sequencing_center"] = charon_project["sequencing_facility"]
    except (KeyError, CharonError) as e:
        LOG.warn(
            'Could not determine sequencing center from Charon ({}); setting to "Unknown".'
            .format(e))
        cl_args["sequencing_center"] = "Unknown"
    cl_args["sequencing_tech"] = "Illumina"
    slurm_qos = config.get("slurm", {}).get("extra_params", {}).get("--qos")
    if slurm_qos:
        cl_args["qos"] = slurm_qos

    # TODO Eventually this will be loaded from e.g. Charon
    reference_genome = 'GRCh37'
    try:
        cl_args["reference_path"] = config['supported_genomes'][
            reference_genome]
        cl_args["uppmax_proj"] = config['environment']['project_id']
    except KeyError as e:
        error_msg = ("Could not load required information from "
                     "configuration file and cannot continue with project {}: "
                     "value \"{}\" missing".format(project, e.message))
        raise ValueError(error_msg)

    try:
        cl_args["sfc_binary"] = config['piper']['path_to_setupfilecreator']
    except KeyError:
        cl_args[
            "sfc_binary"] = "setupFileCreator"  # Assume setupFileCreator is on path

    # setup XML file is always stored in permanent analysis directory
    output_xml_filepath = os.path.join(
        project.base_path, "ANALYSIS", project.dirname, "piper_ngi",
        "setup_xml_files", "{}-{}-{}-setup.xml".format(project, sample,
                                                       workflow))
    safe_makedir(os.path.dirname(output_xml_filepath))
    cl_args["output_xml_filepath"] = output_xml_filepath
    setupfilecreator_cl = ("{sfc_binary} "
                           "--output {output_xml_filepath} "
                           "--project_name {project} "
                           "--sequencing_platform {sequencing_tech} "
                           "--sequencing_center {sequencing_center} "
                           "--uppnex_project_id {uppmax_proj} "
                           "--reference {reference_path}").format(**cl_args)
    if "qos" in cl_args:
        setupfilecreator_cl += " --qos {qos}".format(**cl_args)
    for samp in project:
        for libprep in samp:
            for seqrun in libprep:
                sample_run_directory = os.path.join(project_top_level_dir,
                                                    sample.dirname,
                                                    libprep.dirname,
                                                    seqrun.dirname)
                for fastq_file_name in seqrun.fastq_files:
                    fastq_file = os.path.join(sample_run_directory,
                                              fastq_file_name)
                    setupfilecreator_cl += " --input_fastq {}".format(
                        fastq_file)
    return (setupfilecreator_cl, output_xml_filepath)
示例#17
0
def project_summarize(projects, verbosity=0):
    if type(verbosity) is not int or verbosity < 0:
        print_stderr('Invalid verbosity level ("{}"); must be a positive '
                     'integer; falling back to 0')
        verbosity = 0
    update_charon_with_local_jobs_status(quiet=True) # Don't send mails
    charon_session = CharonSession()
    projects_list = []
    for project in projects:
        try:
            project = os.path.basename(locate_project(project))
        except ValueError as e:
            print_stderr("Skipping project: {}".format(e))
            continue
        print_stderr('Gathering information for project "{}"...'.format(project))
        project_dict = {}
        try:
            project = charon_session.project_get(project)
        except CharonError as e:
            print_stderr('Project "{}" not found in Charon; skipping ({})'.format(project, e), file=sys.stderr)
            continue
        project_dict['name'] = project['name']
        project_dict['id'] = project['projectid']
        project_dict['status'] = project['status']
        samples_list = project_dict['samples'] = []
        for sample in charon_session.project_get_samples(project['projectid']).get('samples', []):
            sample_dict = {}
            sample_dict['id'] = sample['sampleid']
            sample_dict['analysis_status'] = sample['analysis_status']
            sample_dict['coverage'] = sample['total_autosomal_coverage']
            libpreps_list = sample_dict['libpreps'] = []
            samples_list.append(sample_dict)
            for libprep in charon_session.sample_get_libpreps(project['projectid'],
                                                              sample['sampleid']).get('libpreps', []):
                libprep_dict = {}
                libprep_dict['id'] = libprep['libprepid']
                libprep_dict['qc'] = libprep['qc']
                seqruns_list = libprep_dict['seqruns'] = []
                libpreps_list.append(libprep_dict)
                for seqrun in charon_session.libprep_get_seqruns(project['projectid'],
                                                                 sample['sampleid'],
                                                                 libprep['libprepid']).get('seqruns', []):
                    seqrun_dict = {}
                    seqrun_dict['id'] = seqrun['seqrunid']
                    seqrun_dict['alignment_status'] = seqrun['alignment_status']
                    seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage']
                    if seqrun.get('total_reads'):
                        seqrun_dict['total_reads'] = seqrun['total_reads']
                    seqruns_list.append(seqrun_dict)
        projects_list.append(project_dict)


    if verbosity in (0, 1):
        projects_status_list = []
        #projects_by_status = collections.defaultdict(dict)
        #samples_by_status = collections.defaultdict(set)
        #libpreps_by_status = collections.defaultdict(set)
        #seqruns_by_status = collections.defaultdict(set)
        for project_dict in projects_list:
            project_status_dict = {}
            project_status_dict['name'] = "{} ({})".format(project_dict['name'], project_dict['id'])
            project_status_dict['status'] = project_dict['status']
            samples_by_status = project_status_dict['samples_by_status'] = collections.defaultdict(set)
            libpreps_by_status = project_status_dict['libpreps_by_status'] = collections.defaultdict(set)
            seqruns_by_status = project_status_dict['seqruns_by_status'] = collections.defaultdict(set)
            for sample_dict in project_dict.get('samples', []):
                #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id'])
                sample_status = sample_dict['analysis_status']
                libpreps = sample_dict.get('libpreps')
                if libpreps:
                    if not any([libprep["seqruns"] for libprep in libpreps]):
                        sample_status = "NO_SEQRUNS"
                    else:
                        for libprep_dict in libpreps:
                            libpreps_by_status[libprep_dict['qc']].add(libprep_dict['id'])
                            for seqrun_dict in libprep_dict.get('seqruns', []):
                                seqruns_by_status[seqrun_dict['alignment_status']].add(seqrun_dict['id'])
                else:
                    sample_status = "NO_LIBPREPS"
                samples_by_status[sample_status].add(sample_dict['id'])
            projects_status_list.append(project_status_dict)

        print_items = (("Samples", "samples_by_status"),
                       ("Libpreps", "libpreps_by_status"),
                       ("Seqruns", "seqruns_by_status"),)

        for project_dict in projects_status_list:
            print_stderr("\nProject\n-------")
            print_stderr("    Name:   {:>40}".format(project_dict['name']))
            print_stderr("    Status: {:>40}".format(project_dict['status']))
            for name, dict_key in print_items:
                status_dict = project_dict[dict_key]
                print_stderr("{}\n{}".format(name, "-"*len(name)))
                total_items = sum(map(len, status_dict.values()))
                # Sort by analysis value
                for status, item_set in sorted(status_dict.iteritems(), key=lambda key_value: key_value[0]):
                    num_items = len(item_set)
                    percent = (100.00 * num_items) / total_items
                    print_stderr("    Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format(status,
                                                                                       num_items,
                                                                                       total_items,
                                                                                       percent))
                    if verbosity == 1:
                        for item in sorted(item_set):
                            print_stderr("        {}".format(item))
            print_stderr("")

    else: # Verbosity is 2+, maximum verbosity
        output_template = "{}{:<30}{:>{rspace}}"
        for project_dict in projects_list:
            offset = 0
            indent = " " * offset
            rspace = 80 - offset
            print_stderr(output_template.format(indent, "Project name:", project_dict['name'], rspace=rspace))
            print_stderr(output_template.format(indent, "Project ID:", project_dict['id'], rspace=rspace))
            print_stderr(output_template.format(indent, "Project status:", project_dict['status'], rspace=rspace))
            for sample_dict in project_dict['samples']:
                print_stderr("")
                offset = 4
                indent = " " * offset
                rspace = 80 - offset
                print_stderr(output_template.format(indent, "Sample ID:", sample_dict['id'], rspace=rspace))
                print_stderr(output_template.format(indent, "Sample analysis status:", sample_dict['analysis_status'], rspace=rspace))
                print_stderr(output_template.format(indent, "Sample coverage:", sample_dict['coverage'], rspace=rspace))
                for libprep_dict in sample_dict['libpreps']:
                    print_stderr("")
                    offset = 8
                    indent = " " * offset
                    rspace = 80 - offset
                    print_stderr(output_template.format(indent, "Libprep ID:", libprep_dict['id'], rspace=rspace))
                    print_stderr(output_template.format(indent, "Libprep qc status:", libprep_dict['qc'], rspace=rspace))
                    for seqrun_dict in libprep_dict['seqruns']:
                        print_stderr("")
                        offset = 12
                        indent = " " * offset
                        rspace = 80 - offset
                        print_stderr(output_template.format(indent, "Seqrun ID:", seqrun_dict['id'], rspace=rspace))
                        print_stderr(output_template.format(indent, "Seqrun alignment status:", seqrun_dict['alignment_status'], rspace=rspace))
                        print_stderr(output_template.format(indent, "Seqrun mean auto. coverage:", seqrun_dict['coverage'], rspace=rspace))
                        if "total_reads" in seqrun_dict:
                            print_stderr(output_template.format(indent, "Seqrun total reads:", seqrun_dict['total_reads'], rspace=rspace))
            print_stderr("\n")
示例#18
0
def launch_analysis(level, projects_to_analyze, restart_failed_jobs=False,
                    config=None, config_file_path=None):
    """Launch the appropriate seqrun (flowcell-level) analysis for each fastq
    file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    # Update Charon with the local state of all the jobs we're running
    update_charon_with_local_jobs_status()
    charon_session = CharonSession()
    for project in projects_to_analyze:
        # Get information from Charon regarding which workflows to run
        try:
            # E.g. "NGI" for NGI DNA Samples
            workflow = charon_session.project_get(project.project_id)["pipeline"]
        except (KeyError, CharonError) as e:
            # Workflow missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(project, e))
            continue
        try:
            analysis_engine_module_name = config["analysis"]["workflows"][workflow]["analysis_engine"]
        except KeyError:
            error_msg = ("No analysis engine for workflow \"{}\" specified "
                         "in configuration file. Skipping this workflow "
                         "for project {}".format(workflow, project))
            LOG.error(error_msg)
            raise RuntimeError(error_msg)
        # Import the adapter module specified in the config file (e.g. piper_ngi)
        try:
            analysis_module = importlib.import_module(analysis_engine_module_name)
        except ImportError as e:
            error_msg = ('Skipping project "{}" workflow "{}": couldn\'t import '
                         'module "{}": {}'.format(project, workflow, analysis_engine_module_name, e))
            LOG.error(error_msg)
            # Next project
            continue

        # This is weird
        objects_to_process = []
        if level == "sample":
            for sample in project:
                objects_to_process.append({"project": project, "sample": sample})
        elif level == "seqrun":
            for sample in project:
                for libprep in sample:
                    for seqrun in libprep:
                        objects_to_process.append({"project": project,
                                                   "sample": sample,
                                                   "libprep": libprep,
                                                   "seqrun": seqrun})
        # Still weird and not so great
        for obj_dict in objects_to_process:
            project = obj_dict.get("project")
            sample = obj_dict.get("sample")
            libprep = obj_dict.get("libprep")
            seqrun = obj_dict.get("seqrun")

            try:
                if level == "seqrun":
                    charon_reported_status = charon_session.seqrun_get(project.project_id,
                                                                       sample, libprep,
                                                                       seqrun)['alignment_status']
                else: # sample-level
                    charon_reported_status = charon_session.sample_get(project.project_id,
                                                                       sample)['status']
            except (CharonError, KeyError) as e:
                LOG.warn('Unable to get required information from Charon for '
                          'sample "{}" / project "{}" -- forcing it to new: {}'.format(sample, project, e))
                if level == "seqrun":
                    charon_session.seqrun_update(project.project_id, sample.name, libprep.name, seqrun.name, alignment_status="NEW")
                    charon_reported_status = charon_session.seqrun_get(project.project_id,
                                                                       sample, libprep,
                                                                       seqrun)['alignment_status']
                else:
                    charon_session.sample_update(project.project_id, sample.name, status="NEW")
                    charon_reported_status = charon_session.sample_get(project.project_id,
                                                                       sample)['status']

            # Check Charon to ensure this hasn't already been processed
            if charon_reported_status in ("RUNNING", "DONE"):
                if level == "seqrun":
                    LOG.info('Charon reports seqrun analysis for project "{}" / sample "{}" '
                             '/ libprep "{}" / seqrun "{}" does not need processing '
                             ' (already "{}")'.format(project, sample, libprep, seqrun,
                                                      charon_reported_status))
                else: # Sample
                    LOG.info('Charon reports seqrun analysis for project "{}" / sample "{}" '
                             'does not need processing '
                             ' (already "{}")'.format(project, sample, charon_reported_status))
                continue
            elif charon_reported_status == "FAILED":
                if not restart_failed_jobs:
                    if level == "seqrun":
                        LOG.error('FAILED:  Project "{}" / sample "{}" / library "{}" '
                                  '/ flowcell "{}": Charon reports FAILURE, manual '
                                  'investigation needed!'.format(project, sample, libprep, seqrun))
                    else: # Sample
                        LOG.error('FAILED:  Project "{}" / sample "{}" Charon reports FAILURE, manual '
                                  'investigation needed!'.format(project, sample, libprep, seqrun))
                    continue
            try:
                # The engines themselves know which sub-workflows
                # they need to execute for a given level. For example,
                # with DNA Variant Calling on the sequencing run
                # level, we need to execute basic alignment and QC.
                if level == "seqrun":
                    LOG.info('Attempting to launch seqrun analysis for '
                             'project "{}" / sample "{}" / libprep "{}" '
                             '/ seqrun "{}", workflow "{}"'.format(project,
                                                                   sample,
                                                                   libprep,
                                                                   seqrun,
                                                                   workflow))
                    analysis_module.analyze_seqrun(project=project,
                                                   sample=sample,
                                                   libprep=libprep,
                                                   seqrun=seqrun)
                else: # sample level
                    LOG.info('Attempting to launch sample analysis for '
                             'project "{}" / sample "{}" / workflow '
                             '"{}"'.format(project, sample, workflow))
                    analysis_module.analyze_sample(project=project,
                                                   sample=sample)

            except Exception as e:
                raise
                LOG.error('Cannot process project "{}" / sample "{}" / '
                          'libprep "{}" / seqrun "{}" / workflow '
                          '"{}" : {}'.format(project, sample, libprep,
                                             seqrun, workflow, e))
                set_new_seqrun_status = "FAILED"
                continue
def project_summarize(projects, verbosity=0):
    if type(verbosity) is not int or verbosity < 0:
        print_stderr('Invalid verbosity level ("{}"); must be a positive '
                     'integer; falling back to 0')
        verbosity = 0
    update_charon_with_local_jobs_status(quiet=True)  # Don't send mails
    charon_session = CharonSession()
    projects_list = []
    for project in projects:
        try:
            project = os.path.basename(locate_project(project))
        except ValueError as e:
            print_stderr("Skipping project: {}".format(e))
            continue
        print_stderr(
            'Gathering information for project "{}"...'.format(project))
        project_dict = {}
        try:
            project = charon_session.project_get(project)
        except CharonError as e:
            print_stderr(
                'Project "{}" not found in Charon; skipping ({})'.format(
                    project, e),
                file=sys.stderr)
            continue
        project_dict['name'] = project['name']
        project_dict['id'] = project['projectid']
        project_dict['status'] = project['status']
        samples_list = project_dict['samples'] = []
        for sample in charon_session.project_get_samples(
                project['projectid']).get('samples', []):
            sample_dict = {}
            sample_dict['id'] = sample['sampleid']
            sample_dict['analysis_status'] = sample['analysis_status']
            sample_dict['coverage'] = sample['total_autosomal_coverage']
            libpreps_list = sample_dict['libpreps'] = []
            samples_list.append(sample_dict)
            for libprep in charon_session.sample_get_libpreps(
                    project['projectid'],
                    sample['sampleid']).get('libpreps', []):
                libprep_dict = {}
                libprep_dict['id'] = libprep['libprepid']
                libprep_dict['qc'] = libprep['qc']
                seqruns_list = libprep_dict['seqruns'] = []
                libpreps_list.append(libprep_dict)
                for seqrun in charon_session.libprep_get_seqruns(
                        project['projectid'], sample['sampleid'],
                        libprep['libprepid']).get('seqruns', []):
                    seqrun_dict = {}
                    seqrun_dict['id'] = seqrun['seqrunid']
                    seqrun_dict['alignment_status'] = seqrun[
                        'alignment_status']
                    seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage']
                    if seqrun.get('total_reads'):
                        seqrun_dict['total_reads'] = seqrun['total_reads']
                    seqruns_list.append(seqrun_dict)
        projects_list.append(project_dict)

    if verbosity in (0, 1):
        projects_status_list = []
        #projects_by_status = collections.defaultdict(dict)
        #samples_by_status = collections.defaultdict(set)
        #libpreps_by_status = collections.defaultdict(set)
        #seqruns_by_status = collections.defaultdict(set)
        for project_dict in projects_list:
            project_status_dict = {}
            project_status_dict['name'] = "{} ({})".format(
                project_dict['name'], project_dict['id'])
            project_status_dict['status'] = project_dict['status']
            samples_by_status = project_status_dict[
                'samples_by_status'] = collections.defaultdict(set)
            libpreps_by_status = project_status_dict[
                'libpreps_by_status'] = collections.defaultdict(set)
            seqruns_by_status = project_status_dict[
                'seqruns_by_status'] = collections.defaultdict(set)
            for sample_dict in project_dict.get('samples', []):
                #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id'])
                sample_status = sample_dict['analysis_status']
                libpreps = sample_dict.get('libpreps')
                if libpreps:
                    if not any([libprep["seqruns"] for libprep in libpreps]):
                        sample_status = "NO_SEQRUNS"
                    else:
                        for libprep_dict in libpreps:
                            libpreps_by_status[libprep_dict['qc']].add(
                                libprep_dict['id'])
                            for seqrun_dict in libprep_dict.get('seqruns', []):
                                seqruns_by_status[
                                    seqrun_dict['alignment_status']].add(
                                        seqrun_dict['id'])
                else:
                    sample_status = "NO_LIBPREPS"
                samples_by_status[sample_status].add(sample_dict['id'])
            projects_status_list.append(project_status_dict)

        print_items = (
            ("Samples", "samples_by_status"),
            ("Libpreps", "libpreps_by_status"),
            ("Seqruns", "seqruns_by_status"),
        )

        for project_dict in projects_status_list:
            print_stderr("\nProject\n-------")
            print_stderr("    Name:   {:>40}".format(project_dict['name']))
            print_stderr("    Status: {:>40}".format(project_dict['status']))
            for name, dict_key in print_items:
                status_dict = project_dict[dict_key]
                print_stderr("{}\n{}".format(name, "-" * len(name)))
                total_items = sum(map(len, status_dict.values()))
                # Sort by analysis value
                for status, item_set in sorted(
                        status_dict.iteritems(),
                        key=lambda key_value: key_value[0]):
                    num_items = len(item_set)
                    percent = (100.00 * num_items) / total_items
                    print_stderr(
                        "    Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format(
                            status, num_items, total_items, percent))
                    if verbosity == 1:
                        for item in sorted(item_set):
                            print_stderr("        {}".format(item))
            print_stderr("")

    else:  # Verbosity is 2+, maximum verbosity
        output_template = "{}{:<30}{:>{rspace}}"
        for project_dict in projects_list:
            offset = 0
            indent = " " * offset
            rspace = 80 - offset
            print_stderr(
                output_template.format(indent,
                                       "Project name:",
                                       project_dict['name'],
                                       rspace=rspace))
            print_stderr(
                output_template.format(indent,
                                       "Project ID:",
                                       project_dict['id'],
                                       rspace=rspace))
            print_stderr(
                output_template.format(indent,
                                       "Project status:",
                                       project_dict['status'],
                                       rspace=rspace))
            for sample_dict in project_dict['samples']:
                print_stderr("")
                offset = 4
                indent = " " * offset
                rspace = 80 - offset
                print_stderr(
                    output_template.format(indent,
                                           "Sample ID:",
                                           sample_dict['id'],
                                           rspace=rspace))
                print_stderr(
                    output_template.format(indent,
                                           "Sample analysis status:",
                                           sample_dict['analysis_status'],
                                           rspace=rspace))
                print_stderr(
                    output_template.format(indent,
                                           "Sample coverage:",
                                           sample_dict['coverage'],
                                           rspace=rspace))
                for libprep_dict in sample_dict['libpreps']:
                    print_stderr("")
                    offset = 8
                    indent = " " * offset
                    rspace = 80 - offset
                    print_stderr(
                        output_template.format(indent,
                                               "Libprep ID:",
                                               libprep_dict['id'],
                                               rspace=rspace))
                    print_stderr(
                        output_template.format(indent,
                                               "Libprep qc status:",
                                               libprep_dict['qc'],
                                               rspace=rspace))
                    for seqrun_dict in libprep_dict['seqruns']:
                        print_stderr("")
                        offset = 12
                        indent = " " * offset
                        rspace = 80 - offset
                        print_stderr(
                            output_template.format(indent,
                                                   "Seqrun ID:",
                                                   seqrun_dict['id'],
                                                   rspace=rspace))
                        print_stderr(
                            output_template.format(
                                indent,
                                "Seqrun alignment status:",
                                seqrun_dict['alignment_status'],
                                rspace=rspace))
                        print_stderr(
                            output_template.format(
                                indent,
                                "Seqrun mean auto. coverage:",
                                seqrun_dict['coverage'],
                                rspace=rspace))
                        if "total_reads" in seqrun_dict:
                            print_stderr(
                                output_template.format(
                                    indent,
                                    "Seqrun total reads:",
                                    seqrun_dict['total_reads'],
                                    rspace=rspace))
            print_stderr("\n")
示例#20
0
def launch_analysis(projects_to_analyze, restart_failed_jobs=False,
                    restart_finished_jobs=False, restart_running_jobs=False,
                    keep_existing_data=False, no_qc=False, exec_mode="sbatch",
                    quiet=False, manual=False, config=None, config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    for project in projects_to_analyze: # Get information from Charon regarding which best practice analyses to run
        try:
            engine = get_engine_for_bp(project, config, config_file_path)
        except (RuntimeError, CharonError) as e:
            LOG.error('Project {} could not be processed: {}'.format(project, e))
            continue
        engine.local_process_tracking.update_charon_with_local_jobs_status(config=config)
    charon_session = CharonSession()
    for project in projects_to_analyze:
        try:
            project_status = charon_session.project_get(project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(project, e))
            continue
        if not project_status == "OPEN":
            error_text = ('Data found on filesystem for project "{}" but Charon '
                          'reports its status is not OPEN ("{}"). Not launching '
                          'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name, level="ERROR", info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e: # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample, qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = ('Cannot process project "{}" / sample "{}" / '
                                  'engine "{}" : {}'.format(project, sample,
                                                            analysis_module.__name__,
                                                            e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name, sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR", info_text=e)
            # Launch actual best-practice analysis
            try:
                charon_reported_status = charon_session.sample_get(project.project_id,
                                                                   sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                if charon_reported_status == "UNDER_ANALYSIS":
                    if not restart_running_jobs:
                        error_text = ('Charon reports seqrun analysis for project "{}" '
                                      '/ sample "{}" does not need processing (already '
                                      '"{}")'.format(project, sample, charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
                elif charon_reported_status == "ANALYZED":
                    if not restart_finished_jobs:
                        error_text = ('Charon reports seqrun analysis for project "{}" '
                                      '/ sample "{}" does not need processing (already '
                                      '"{}")'.format(project, sample, charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet') and not config.get('manual'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
                elif charon_reported_status == "FAILED":
                    if not restart_failed_jobs:
                        error_text = ('FAILED:  Project "{}" / sample "{}" Charon reports '
                                      'FAILURE, manual investigation needed!'.format(project, sample))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
            except CharonError as e:
                LOG.error(e)
                continue
            try:
                LOG.info('Attempting to launch sample analysis for '
                         'project "{}" / sample "{}" / engine'
                         '"{}"'.format(project, sample, analysis_module.__name__))
                #actual analysis launch
                analysis_module.analyze(project=project,
                                        sample=sample,
                                        restart_finished_jobs=restart_finished_jobs,
                                        restart_running_jobs=restart_running_jobs,
                                        keep_existing_data=keep_existing_data,
                                        exec_mode=exec_mode,
                                        config=config,
                                        generate_bqsr_bam=generate_bqsr_bam)
            except Exception as e:
                error_text = ('Cannot process project "{}" / sample "{}" / '
                              'engine "{}" : {}'.format(project, sample,
                                                        analysis_module.__name__,
                                                        e))
                LOG.error(error_text)
                if not config.get("quiet"):
                    mail_analysis(project_name=project.name, sample_name=sample.name,
                                  engine_name=analysis_module.__name__,
                                  level="ERROR", info_text=e)
                continue
示例#21
0
def launch_analysis(projects_to_analyze,
                    restart_failed_jobs=False,
                    restart_finished_jobs=False,
                    restart_running_jobs=False,
                    keep_existing_data=False,
                    no_qc=False,
                    exec_mode="sbatch",
                    quiet=False,
                    manual=False,
                    config=None,
                    config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    for project in projects_to_analyze:  # Get information from Charon regarding which best practice analyses to run
        try:
            engine = get_engine_for_bp(project, config, config_file_path)
        except (RuntimeError, CharonError) as e:
            LOG.error('Project {} could not be processed: {}'.format(
                project, e))
            continue
        engine.local_process_tracking.update_charon_with_local_jobs_status(
            config=config)
    charon_session = CharonSession()
    for project in projects_to_analyze:
        try:
            project_status = charon_session.project_get(
                project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(
                project, e))
            continue
        if not project_status == "OPEN":
            error_text = (
                'Data found on filesystem for project "{}" but Charon '
                'reports its status is not OPEN ("{}"). Not launching '
                'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name,
                              level="ERROR",
                              info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e:  # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(
                project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample,
                                           qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = (
                        'Cannot process project "{}" / sample "{}" / '
                        'engine "{}" : {}'.format(project, sample,
                                                  analysis_module.__name__, e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name,
                                      sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR",
                                      info_text=e)
            # Launch actual best-practice analysis
            try:
                charon_reported_status = charon_session.sample_get(
                    project.project_id, sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                if charon_reported_status == "UNDER_ANALYSIS":
                    if not restart_running_jobs:
                        error_text = (
                            'Charon reports seqrun analysis for project "{}" '
                            '/ sample "{}" does not need processing (already '
                            '"{}")'.format(project, sample,
                                           charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
                elif charon_reported_status == "ANALYZED":
                    if not restart_finished_jobs:
                        error_text = (
                            'Charon reports seqrun analysis for project "{}" '
                            '/ sample "{}" does not need processing (already '
                            '"{}")'.format(project, sample,
                                           charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet') and not config.get(
                                'manual'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
                elif charon_reported_status == "FAILED":
                    if not restart_failed_jobs:
                        error_text = (
                            'FAILED:  Project "{}" / sample "{}" Charon reports '
                            'FAILURE, manual investigation needed!'.format(
                                project, sample))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
            except CharonError as e:
                LOG.error(e)
                continue
            try:
                LOG.info('Attempting to launch sample analysis for '
                         'project "{}" / sample "{}" / engine'
                         '"{}"'.format(project, sample,
                                       analysis_module.__name__))
                #actual analysis launch
                analysis_module.analyze(
                    project=project,
                    sample=sample,
                    restart_finished_jobs=restart_finished_jobs,
                    restart_running_jobs=restart_running_jobs,
                    keep_existing_data=keep_existing_data,
                    exec_mode=exec_mode,
                    config=config,
                    generate_bqsr_bam=generate_bqsr_bam)
            except Exception as e:
                error_text = ('Cannot process project "{}" / sample "{}" / '
                              'engine "{}" : {}'.format(
                                  project, sample, analysis_module.__name__,
                                  e))
                LOG.error(error_text)
                if not config.get("quiet"):
                    mail_analysis(project_name=project.name,
                                  sample_name=sample.name,
                                  engine_name=analysis_module.__name__,
                                  level="ERROR",
                                  info_text=e)
                continue