示例#1
0
def find_projects_from_samples(sample_list):
    """Given a list of samples, attempts to determine
    which projects they belong to using Charon records.

    :param list sample_list: A list of the samples for which to find projects

    :returns: a dict of {project_id: set(samples)}
    :rtype: dict of sets

    :raises ValueError: If you fail to pass in a list. Nice work!
    """
    STHLM_SAMPLE_RE = re.compile(r'(P\d{4})_')
    projects_dict = collections.defaultdict(set)
    samples_by_project_id = {}
    no_owners_found = set()
    multiple_owners_found = set()
    charon_session = CharonSession()
    if not type(sample_list) is list:
        raise ValueError("Input should be list.")

    for sample_name in sample_list:
        # First see if we can just parse out the project id from the sample name
        m = STHLM_SAMPLE_RE.match(sample_name)
        if m:
            project_id = m.groups()[0]
            try:
                # Ensure that we guessed right
                charon_session.sample_get(project_id, sample_name)
            except CharonError as e:
                LOG.debug('Project for sample "{}" appears to be "{}" but is not '
                          'present in Charon ({})'.format(sample_name, project_id, e))
                no_owners_found.add(sample_name)
            else:
                projects_dict[project_id].add(sample_name)
        else:
            # Otherwise check all the projects for matching samples (returns list or None)
            owner_projects_list = charon_session.sample_get_projects(sample_name)
            if not owner_projects_list:
                no_owners_found.add(sample_name)
            elif len(owner_projects_list) > 1:
                multiple_owners_found.add(sample_name)
            else:
                projects_dict[owner_projects_list[0]].add(sample_name)
    if no_owners_found:
        LOG.warn("No projects found for the following samples: {}".format(", ".join(no_owners_found)))
    if multiple_owners_found:
        LOG.warn('Multiple projects found with the following samples (owner '
                 'could not be unamibugously determined): {}'.format(", ".join(multiple_owners_found)))
    return dict(projects_dict)
示例#2
0
 def add_supr_name_delivery_in_charon(self, supr_name_of_delivery):
     '''Updates delivery_projects in Charon at project level
     '''
     charon_session = CharonSession()
     try:
         #fetch the project
         sample_charon = charon_session.sample_get(self.projectid,
                                                   self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if supr_name_of_delivery not in sample_charon:
             delivery_projects.append(supr_name_of_delivery)
             charon_session.sample_update(
                 self.projectid,
                 self.sampleid,
                 delivery_projects=delivery_projects)
             logger.info(
                 'Charon delivery_projects for sample {} updated with value {}'
                 .format(self.sampleid, supr_name_of_delivery))
         else:
             logger.warn(
                 'Charon delivery_projects for sample {} not updated with value {} because the value was already present'
                 .format(self.sampleid, supr_name_of_delivery))
     except Exception, e:
         logger.error(
             'Failed to update delivery_projects in charon while delivering {}. Error says: {}'
             .format(self.sampleid, e))
         logger.exception(e)
def analyze(analysis_object, config=None, config_file_path=None):

    charon_session = CharonSession()
    charon_pj=charon_session.project_get(analysis_object.project.project_id)
    reference_genome=charon_pj.get('reference')
    if charon_pj.get("sequencing_facility") == "NGI-S":
        analysis_object.sequencing_facility="sthlm"
    elif charon_pj.get("sequencing_facility") == "NGI-U":
        analysis_object.sequencing_facility="upps"
    else:
        LOG.error("charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?")
        raise RuntimeError
    fastq_files=[]
    if reference_genome and reference_genome != 'other':
        for sample in analysis_object.project:
            try:
                charon_reported_status = charon_session.sample_get(analysis_object.project.project_id,
                                                                   sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                do_analyze=handle_sample_status(analysis_object, sample, charon_reported_status)
                if not do_analyze :
                    continue
            except CharonError as e:
                LOG.error(e)

            for libprep in sample:
                charon_lp_status=charon_session.libprep_get(analysis_object.project.project_id, sample.name, libprep.name).get('qc')
                do_analyze=handle_libprep_status(analysis_object, libprep, charon_lp_status)
                if not do_analyze :
                    continue
                else:
                    for seqrun in libprep:
                        charon_sr_status=charon_session.seqrun_get(analysis_object.project.project_id, sample.name, libprep.name, seqrun.name).get('alignment_status')
                        do_analyze=handle_seqrun_status(analysis_object, seqrun, charon_sr_status)
                        if not do_analyze :
                            continue
                        else:
                            seqrun.being_analyzed=True
                            sample.being_analyzed = sample.being_analyzed or True
                            # filter out index files from analysis
                            for fastq_file in filter(lambda f: not is_index_file(f), seqrun.fastq_files):
                                fastq_path=os.path.join(analysis_object.project.base_path, "DATA", analysis_object.project.project_id, sample.name, libprep.name, seqrun.name, fastq_file)
                                fastq_files.append(fastq_path)
        
        if not fastq_files:
            LOG.error("No fastq files obtained for the analysis fo project {}, please check the Charon status.".format(analysis_object.project.name))
        else :
            if analysis_object.restart_running_jobs:
                stop_ongoing_analysis(analysis_object)
            fastq_dir=preprocess_analysis(analysis_object, fastq_files)
            sbatch_path=write_batch_job(analysis_object, reference_genome, fastq_dir)
            job_id=start_analysis(sbatch_path)
            analysis_path=os.path.join(analysis_object.project.base_path, "ANALYSIS", analysis_object.project.project_id, 'rna_ngi')
            record_project_job(analysis_object.project, job_id, analysis_path)
示例#4
0
def analyze_sample(project, sample, config=None, config_file_path=None):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)
    """
    modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"]
    load_modules(modules_to_load)
    charon_session = CharonSession()
    # Determine if we can begin sample-level processing yet.
    # Conditions are that the coverage is above 28.9X
    # If these conditions become more complex we can create a function for this
    sample_total_autosomal_coverage = charon_session.sample_get(project.project_id,
                                     sample.name).get('total_autosomal_coverage')
    if sample_total_autosomal_coverage > 28.4:
        LOG.info('Sample "{}" in project "{}" is ready for processing.'.format(sample, project))
        for workflow_subtask in get_subtasks_for_level(level="sample"):
            if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask,
                                                    project_id=project.project_id,
                                                    sample_id=sample.name):
                try:
                    ## Temporarily logging to a file until we get ELK set up
                    log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                         project_base_path=project.base_path,
                                                         project_name=project.name,
                                                         sample_id=sample.name)
                    rotate_log(log_file_path)
                    # Store the exit code of detached processes
                    exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                                project_base_path=project.base_path,
                                                                project_name=project.name,
                                                                sample_id=sample.name)

                    build_setup_xml(project, config, sample)
                    command_line = build_piper_cl(project, workflow_subtask, exit_code_path, config)
                    p_handle = launch_piper_job(command_line, project, log_file_path)
                    try:
                        record_process_sample(project=project, sample=sample,
                                              workflow_subtask=workflow_subtask,
                                              analysis_module_name="piper_ngi",
                                              analysis_dir=project.analysis_dir,
                                              pid=p_handle.pid)
                    except RuntimeError as e:
                        LOG.error(e)
                        continue
                except (NotImplementedError, RuntimeError) as e:
                    error_msg = ('Processing project "{}" / sample "{}" failed: '
                                 '{}'.format(project, sample, e.__repr__()))
                    LOG.error(error_msg)
    else:
        LOG.info('Sample "{}" in project "{}" is not yet ready for '
                 'processing.'.format(sample, project))
def update_gt_status_in_charon(sample_id, status, concordance=None):
    project_id = sample_id.split('_')[0]
    try:
        charon_session = CharonSession()
        sample = charon_session.sample_get(project_id, sample_id)
        if concordance is None:
            if sample.get('genotype_status') != status:
                charon_session.sample_update(projectid=project_id, sampleid=sample_id,genotype_status=status)
        else:
            if sample.get('genotype_status') != status or sample.get('genotype_concordance') != concordance:
                charon_session.sample_update(projectid=project_id, sampleid=sample_id,genotype_status=status, genotype_concordance=concordance)
    except CharonError as e:
        return str(e)
def main(inbox=None, num_days=14, genotype_files=None, config=None, config_file_path=None):
    if genotype_files:
        gt_files_valid = [os.path.abspath(gt_file) for gt_file in genotype_files]
    else:
        if not inbox:
            try:
                inboxes = config["environment"]["flowcell_inbox"]
            except (KeyError, TypeError):
                raise ValueError("No path to delivery inbox specified by argument "
                                 "or in configuration file ({}). Exiting.".format(config_file_path))
        for inbox in inboxes:
            inbox = os.path.abspath(inbox)
            # Convert to seconds
            cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60)
            LOG.info("Searching for genotype files under {} modified after "
                     "{}".format(inbox, time.ctime(cutoff_age)))
            gt_files_valid = []
            for gt_file in filter(GENOTYPE_FILE_RE.match, glob.glob(os.path.join(inbox, "*"))):
                if os.stat(gt_file).st_mtime > time.time() - cutoff_age:
                    gt_files_valid.append(os.path.abspath(gt_file))

    if not gt_files_valid:
        LOG.info("No genotype files found under {} newer than "
                 "{}".format(inbox, time.ctime(cutoff_age)))
    else:
        charon_session = CharonSession()
        for gt_file_path in gt_files_valid:
            project_samples_dict = \
                    find_projects_from_samples(parse_samples_from_vcf(gt_file_path))
            for project_id, samples in project_samples_dict.iteritems():
                LOG.info("Updating project {}...".format(project_id))
                for sample in samples:
                    try:
                        genotype_status = \
                            charon_session.sample_get(projectid=project_id,
                                                      sampleid=sample).get("genotype_status")
                        if genotype_status in (None, "NOT_AVAILABLE"):
                            LOG.info('Updating sample {} genotype_status '
                                     'to "AVAILABLE"...'.format(sample))
                            charon_session.sample_update(projectid=project_id,
                                                         sampleid=sample,
                                                         genotype_status="AVAILABLE")
                        else:
                            LOG.info('Not updating sample {} genotype_status '
                                     '(already "{}")'.format(sample, genotype_status))
                    except CharonError as e:
                        LOG.error('Could not update genotype status to "AVAILABLE" '
                                  'for project/sample "{}/{}": {}'.format(project_id,
                                                                          sample,
                                                                          e))
 def add_supr_name_delivery_in_charon(self, supr_name_of_delivery):
     '''Updates delivery_projects in Charon at project level
     '''
     charon_session = CharonSession()
     try:
         #fetch the project
         sample_charon = charon_session.sample_get(self.projectid, self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if supr_name_of_delivery not in sample_charon:
             delivery_projects.append(supr_name_of_delivery)
             charon_session.sample_update(self.projectid, self.sampleid, delivery_projects=delivery_projects)
             logger.info('Charon delivery_projects for sample {} updated with value {}'.format(self.sampleid, supr_name_of_delivery))
         else:
             logger.warn('Charon delivery_projects for sample {} not updated with value {} because the value was already present'.format(self.sampleid, supr_name_of_delivery))
     except Exception, e:
         logger.error('Failed to update delivery_projects in charon while delivering {}. Error says: {}'.format(self.sampleid, e))
         logger.exception(e)
示例#8
0
 def add_dds_name_delivery_in_charon(self, name_of_delivery):
     """Updates delivery_projects in Charon at project level
     """
     charon_session = CharonSession()
     try:
         # Fetch the project
         sample_charon = charon_session.sample_get(self.projectid, self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if name_of_delivery not in sample_charon:
             delivery_projects.append(name_of_delivery)
             charon_session.sample_update(self.projectid, self.sampleid, delivery_projects=delivery_projects)
             logger.info('Charon delivery_projects for sample {} updated '
                         'with value {}'.format(self.sampleid, name_of_delivery))
         else:
             logger.warn('Charon delivery_projects for sample {} not updated '
                         'with value {} because the value was already present'.format(self.sampleid, name_of_delivery))
     except Exception as e:
         logger.exception('Failed to update delivery_projects in charon while delivering {}.'.format(self.sampleid))
示例#9
0
def update_gt_status_in_charon(sample_id, status, concordance=None):
    project_id = sample_id.split('_')[0]
    try:
        charon_session = CharonSession()
        sample = charon_session.sample_get(project_id, sample_id)
        if concordance is None:
            if sample.get('genotype_status') != status:
                charon_session.sample_update(projectid=project_id,
                                             sampleid=sample_id,
                                             genotype_status=status)
        else:
            if sample.get('genotype_status') != status or sample.get(
                    'genotype_concordance') != concordance:
                charon_session.sample_update(projectid=project_id,
                                             sampleid=sample_id,
                                             genotype_status=status,
                                             genotype_concordance=concordance)
    except CharonError as e:
        return str(e)
示例#10
0
def launch_analysis(projects_to_analyze,
                    restart_failed_jobs=False,
                    restart_finished_jobs=False,
                    restart_running_jobs=False,
                    keep_existing_data=False,
                    no_qc=False,
                    exec_mode="sbatch",
                    quiet=False,
                    manual=False,
                    config=None,
                    config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    for project in projects_to_analyze:  # Get information from Charon regarding which best practice analyses to run
        try:
            engine = get_engine_for_bp(project, config, config_file_path)
        except (RuntimeError, CharonError) as e:
            LOG.error('Project {} could not be processed: {}'.format(
                project, e))
            continue
        engine.local_process_tracking.update_charon_with_local_jobs_status(
            config=config)
    charon_session = CharonSession()
    for project in projects_to_analyze:
        try:
            project_status = charon_session.project_get(
                project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(
                project, e))
            continue
        if not project_status == "OPEN":
            error_text = (
                'Data found on filesystem for project "{}" but Charon '
                'reports its status is not OPEN ("{}"). Not launching '
                'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name,
                              level="ERROR",
                              info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e:  # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(
                project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample,
                                           qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = (
                        'Cannot process project "{}" / sample "{}" / '
                        'engine "{}" : {}'.format(project, sample,
                                                  analysis_module.__name__, e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name,
                                      sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR",
                                      info_text=e)
            # Launch actual best-practice analysis
            try:
                charon_reported_status = charon_session.sample_get(
                    project.project_id, sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                if charon_reported_status == "UNDER_ANALYSIS":
                    if not restart_running_jobs:
                        error_text = (
                            'Charon reports seqrun analysis for project "{}" '
                            '/ sample "{}" does not need processing (already '
                            '"{}")'.format(project, sample,
                                           charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
                elif charon_reported_status == "ANALYZED":
                    if not restart_finished_jobs:
                        error_text = (
                            'Charon reports seqrun analysis for project "{}" '
                            '/ sample "{}" does not need processing (already '
                            '"{}")'.format(project, sample,
                                           charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet') and not config.get(
                                'manual'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
                elif charon_reported_status == "FAILED":
                    if not restart_failed_jobs:
                        error_text = (
                            'FAILED:  Project "{}" / sample "{}" Charon reports '
                            'FAILURE, manual investigation needed!'.format(
                                project, sample))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
            except CharonError as e:
                LOG.error(e)
                continue
            try:
                LOG.info('Attempting to launch sample analysis for '
                         'project "{}" / sample "{}" / engine'
                         '"{}"'.format(project, sample,
                                       analysis_module.__name__))
                #actual analysis launch
                analysis_module.analyze(
                    project=project,
                    sample=sample,
                    restart_finished_jobs=restart_finished_jobs,
                    restart_running_jobs=restart_running_jobs,
                    keep_existing_data=keep_existing_data,
                    exec_mode=exec_mode,
                    config=config,
                    generate_bqsr_bam=generate_bqsr_bam)
            except Exception as e:
                error_text = ('Cannot process project "{}" / sample "{}" / '
                              'engine "{}" : {}'.format(
                                  project, sample, analysis_module.__name__,
                                  e))
                LOG.error(error_text)
                if not config.get("quiet"):
                    mail_analysis(project_name=project.name,
                                  sample_name=sample.name,
                                  engine_name=analysis_module.__name__,
                                  level="ERROR",
                                  info_text=e)
                continue
def update_charon_with_local_jobs_status():
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()

        # Sequencing Run Analyses
        for seqrun_entry in session.query(SeqrunAnalysis).all():

            # Local names
            workflow = seqrun_entry.workflow
            project_name = seqrun_entry.project_name
            project_id = seqrun_entry.project_id
            project_base_path = seqrun_entry.project_base_path
            sample_id = seqrun_entry.sample_id
            libprep_id = seqrun_entry.libprep_id
            seqrun_id = seqrun_entry.seqrun_id
            pid = seqrun_entry.process_id

            exit_code = get_exit_code(workflow_name=workflow,
                                      project_base_path=project_base_path,
                                      project_name=project_name,
                                      sample_id=sample_id,
                                      libprep_id=libprep_id,
                                      seqrun_id=seqrun_id)
            label = "project/sample/libprep/seqrun {}/{}/{}/{}".format(project_name,
                                                                       sample_id,
                                                                       libprep_id,
                                                                       seqrun_id)
            try:
                if exit_code == 0:
                    # 0 -> Job finished successfully
                    LOG.info('Workflow "{}" for {} finished succesfully. '
                             'Recording status "DONE" in Charon'.format(workflow, label))
                    set_alignment_status = "DONE"
                    try:
                        write_to_charon_alignment_results(base_path=project_base_path,
                                                          project_name=project_name,
                                                          project_id=project_id,
                                                          sample_id=sample_id,
                                                          libprep_id=libprep_id,
                                                          seqrun_id=seqrun_id)
                    except (RuntimeError, ValueError) as e:
                        LOG.error(e)
                        set_alignment_status = "FAILED"
                    charon_session.seqrun_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 libprepid=libprep_id,
                                                 seqrunid=seqrun_id,
                                                 alignment_status=set_alignment_status)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(seqrun_entry)
                elif exit_code == 1 or (not psutil.pid_exists(pid) and not exit_code):
                    if exit_code == 1:
                        # 1 -> Job failed (DATA_FAILURE / COMPUTATION_FAILURE ?)
                        LOG.info('Workflow "{}" for {} failed. Recording status '
                                 '"FAILED" in Charon.'.format(workflow, label))
                    else:
                        # Job failed without writing an exit code (process no longer running)
                        LOG.error('ERROR: No exit code found for process {} '
                                  'but it does not appear to be running '
                                  '(pid {} does not exist). Setting status to '
                                  '"FAILED", inspect manually'.format(label, pid))
                    charon_session.seqrun_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 libprepid=libprep_id,
                                                 seqrunid=seqrun_id,
                                                 alignment_status="FAILED")
                    # Job is only deleted if the Charon update succeeds
                    LOG.debug("Deleting local entry {}".format(seqrun_entry))
                    session.delete(seqrun_entry)
                else:
                    # None -> Job still running
                    charon_status = charon_session.seqrun_get(projectid=project_id,
                                                              sampleid=sample_id,
                                                              libprepid=libprep_id,
                                                              seqrunid=seqrun_id)['alignment_status']
                    if not charon_status == "RUNNING":
                        LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but '
                                 'local process tracking database indicates it is running. '
                                 'Setting value in Charon to RUNNING.'.format(label, charon_status))
                        charon_session.seqrun_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     libprepid=libprep_id,
                                                     seqrunid=seqrun_id,
                                                     alignment_status="RUNNING")
            except CharonError as e:
                LOG.error('Unable to update Charon status for "{}": {}'.format(label, e))


        for sample_entry in session.query(SampleAnalysis).all():

            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            pid = sample_entry.process_id

            exit_code = get_exit_code(workflow_name=workflow,
                                      project_base_path=project_base_path,
                                      project_name=project_name,
                                      sample_id=sample_id)
            label = "project/sample/libprep/seqrun {}/{}".format(project_name,
                                                                       sample_id)
            try:
                if exit_code == 0:
                    # 0 -> Job finished successfully
                    LOG.info('Workflow "{}" for {} finished succesfully. '
                             'Recording status "DONE" in Charon'.format(workflow, label))
                    set_status = "DONE"
                    ## TODO implement sample-level analysis results parsing / reporting to Charon?
                    #try:
                    #    write_to_charon_alignment_results(base_path=project_base_path,
                    #                                      project_name=project_name,
                    #                                      project_id=project_id,
                    #                                      sample_id=sample_id,
                    #                                      libprep_id=libprep_id,
                    #                                      seqrun_id=seqrun_id)
                    #except (RuntimeError, ValueError) as e:
                    #    LOG.error(e)
                    #    set_alignment_status = "FAILED"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 status=set_status)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                elif exit_code == 1 or (not psutil.pid_exists(pid) and not exit_code):
                    if exit_code == 1:
                        # 1 -> Job failed (DATA_FAILURE / COMPUTATION_FAILURE ?)
                        LOG.info('Workflow "{}" for {} failed. Recording status '
                                 '"COMPUTATION_FAILED" in Charon.'.format(workflow, label))
                    else:
                        # Job failed without writing an exit code
                        LOG.error('ERROR: No exit code found for process {} '
                                  'but it does not appear to be running '
                                  '(pid {} does not exist). Setting status to '
                                  '"COMPUTATION_FAILED", inspect manually'.format(label, pid))
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 status="COMPUTATION_FAILED")
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running
                    try:
                        charon_status = charon_session.sample_get(projectid=project_id,
                                                              sampleid=sample_id)['status']
                    except (CharonError, KeyError) as e:
                        LOG.warn('Unable to get required information from Charon for '
                          'sample "{}" / project "{}" -- forcing it to RUNNING: {}'.format(sample_id, project_id, e))
                        charon_status = "NEW"

                    if not charon_status == "RUNNING":
                        LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but '
                                 'local process tracking database indicates it is running. '
                                 'Setting value in Charon to RUNNING.'.format(label, charon_status))
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     status="RUNNING")
            except CharonError as e:
                LOG.error('Unable to update Charon status for "{}": {}'.format(label, e))
        session.commit()
示例#12
0
def analyze(project, sample,
            exec_mode="sbatch", 
            restart_finished_jobs=False,
            restart_running_jobs=False,
            keep_existing_data=False,
            level="sample",
            genotype_file=None,
            config=None, config_file_path=None,
            generate_bqsr_bam=False):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param str exec_mode: "sbatch" or "local" (local not implemented)
    :param bool restart_finished_jobs: Restart jobs that are already done (have a .done file)
    :param bool restart_running_jobs: Kill and restart currently-running jobs
    :param str level: The level on which to perform the analysis ("sample" or "genotype")
    :param str genotype_file: The path to the genotype file (only relevant for genotype analysis)
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)

    :raises ValueError: If exec_mode is an unsupported value
    """
    if level == "sample":
        status_field = "alignment_status"
    elif level == "genotype":
        status_field = "genotype_status"
    else:
        LOG.warn('Unknown workflow level: "{}"'.format(level))
        status_field = "alignment_status" # Or should we abort?
    try:
        check_for_preexisting_sample_runs(project, sample, restart_running_jobs,
                                          restart_finished_jobs, status_field)
    except RuntimeError as e:
        raise RuntimeError('Aborting processing of project/sample "{}/{}": '
                           '{}'.format(project, sample, e))
    if exec_mode.lower() not in ("sbatch", "local"):
        raise ValueError('"exec_mode" param must be one of "sbatch" or "local" '
                         'value was "{}"'.format(exec_mode))
    if exec_mode == "local":
        modules_to_load = config.get("piper", {}).get("load_modules", [])
        load_modules(modules_to_load)
    for workflow_subtask in workflows.get_subtasks_for_level(level=level):
        if level == "genotype":
            genotype_status = None # Some records in Charon lack this field, I'm guessing
            try:
                charon_session = CharonSession()
                genotype_status = charon_session.sample_get(projectid=project.project_id,
                                                            sampleid=sample.name).get("genotype_status")
            except CharonError as e:
                LOG.error('Couldn\'t determine genotyping status for project/'
                          'sample "{}/{}"; skipping analysis.'.format(project, sample))
                continue
            if find_previous_genotype_analyses(project, sample) or genotype_status == "DONE":
                if not restart_finished_jobs:
                    LOG.info('Project/sample "{}/{}" has completed genotype '
                             'analysis previously; skipping (use flag to force '
                             'analysis)'.format(project, sample))
                    continue
        if restart_running_jobs:
            # Kill currently-running jobs if they exist
            kill_running_sample_analysis(workflow_subtask=workflow_subtask,
                                         project_id=project.project_id,
                                         sample_id=sample.name)
        # This checks the local jobs database
        if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask,
                                                project_id=project.project_id,
                                                sample_id=sample.name):
            LOG.info('Launching "{}" analysis for sample "{}" in project '
                     '"{}"'.format(workflow_subtask, sample, project))
            try:
                log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                     project_base_path=project.base_path,
                                                     project_name=project.dirname,
                                                     project_id=project.project_id,
                                                     sample_id=sample.name)
                rotate_file(log_file_path)
                exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                            project_base_path=project.base_path,
                                                            project_name=project.dirname,
                                                            project_id=project.project_id,
                                                            sample_id=sample.name)
                if level == "sample":
                    if not keep_existing_data:
                        remove_previous_sample_analyses(project, sample)
                        default_files_to_copy=None
                elif level == "genotype":
                    if not keep_existing_data:
                        remove_previous_genotype_analyses(project)
                        default_files_to_copy=None

                # Update the project to keep only valid fastq files for setup.xml creation
                if level == "genotype":
                    updated_project, default_files_to_copy = \
                            collect_files_for_sample_analysis(project,
                                                              sample,
                                                              restart_finished_jobs=True,
                                                              status_field="genotype_status")
                else:
                    updated_project, default_files_to_copy = \
                            collect_files_for_sample_analysis(project,
                                                              sample,
                                                              restart_finished_jobs,
                                                              status_field="alignment_status")
                setup_xml_cl, setup_xml_path = build_setup_xml(project=updated_project,
                                                               sample=sample,
                                                               workflow=workflow_subtask,
                                                               local_scratch_mode=(exec_mode == "sbatch"),
                                                               config=config)
                piper_cl = build_piper_cl(project=project,
                                          workflow_name=workflow_subtask,
                                          setup_xml_path=setup_xml_path,
                                          exit_code_path=exit_code_path,
                                          config=config,
                                          exec_mode=exec_mode,
                                          generate_bqsr_bam=generate_bqsr_bam)
                if exec_mode == "sbatch":
                    process_id = None
                    slurm_job_id = sbatch_piper_sample([setup_xml_cl, piper_cl],
                                                       workflow_subtask,
                                                       project, sample,
                                                       restart_finished_jobs=restart_finished_jobs,
                                                       files_to_copy=default_files_to_copy)
                    for x in xrange(10):
                        # Time delay to let sbatch get its act together
                        # (takes a few seconds to be visible with sacct)
                        try:
                            get_slurm_job_status(slurm_job_id)
                            break
                        except ValueError:
                            time.sleep(2)
                    else:
                        LOG.error('sbatch file for sample {}/{} did not '
                                  'queue properly! Job ID {} cannot be '
                                  'found.'.format(project, sample, slurm_job_id))
                else: # "local"
                    raise NotImplementedError('Local execution not currently implemented. '
                                              'I\'m sure Denis can help you with this.')
                    #slurm_job_id = None
                    #launch_piper_job(setup_xml_cl, project)
                    #process_handle = launch_piper_job(piper_cl, project)
                    #process_id = process_handle.pid
                try:
                    record_process_sample(project=project,
                                          sample=sample,
                                          analysis_module_name="piper_ngi",
                                          slurm_job_id=slurm_job_id,
                                          process_id=process_id,
                                          workflow_subtask=workflow_subtask)
                except RuntimeError as e:
                    LOG.error(e)
                    ## Question: should we just kill the run in this case or let it go?
                    continue
            except (NotImplementedError, RuntimeError, ValueError) as e:
                error_msg = ('Processing project "{}" / sample "{}" / workflow "{}" '
                             'failed: {}'.format(project, sample,
                                                 workflow_subtask,
                                                 e))
                LOG.error(error_msg)
示例#13
0
def update_charon_with_local_jobs_status(config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine=sample_entry.engine
            # Only one of these will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for sample run {}/{}: {}'.format(project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
                continue
            try:
                if piper_exit_code and piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    set_status = "ANALYZED"
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow, label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="INFO", info_text=info_text)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 analysis_status=set_status)
                    recurse_status="DONE"
                    recurse_status_for_sample(project_obj, recurse_status)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    # Parse seqrun output results / update Charon
                    # This is a semi-optional step -- failure here will send an
                    # email but not more than once. The record is still removed
                    # from the local jobs database, so this will have to be done
                    # manually if you want it done at all.
                    piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                project_id,"piper_ngi",  "02_preliminary_alignment_qc")
                    update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir)
                elif piper_exit_code and piper_exit_code >0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                 '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR", info_text=error_text)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 analysis_status=set_status)
                    recurse_status_for_sample(project_obj, set_status)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running for '
                                      '{}: setting status to {} in Charon'.format(label, set_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name, sample_name=sample_id,
                                      engine_name=engine, level="ERROR", info_text=error_text)
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     analysis_status=set_status)
                        recurse_status_for_sample(project_obj, set_status)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        charon_status = charon_session.sample_get(projectid=project_id,
                                                                  sampleid=sample_id)['analysis_status']
                        if not charon_status == "UNDER_ANALYSIS":
                            set_status = "UNDER_ANALYSIS"
                            LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but '
                                     'local process tracking database indicates it is running. '
                                     'Setting value in Charon to {}.'.format(label, charon_status,
                                                                             set_status))
                            charon_session.sample_update(projectid=project_id,
                                                         sampleid=sample_id,
                                                         analysis_status=set_status)
                            recurse_status_for_sample(project_obj, "RUNNING")
            except CharonError as e:
                error_text = ('Unable to update Charon status for "{}": {}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              'status for "{}": {}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
        session.commit()
示例#14
0
def analyze(analysis_object, config=None, config_file_path=None):

    charon_session = CharonSession()
    charon_pj = charon_session.project_get(analysis_object.project.project_id)
    reference_genome = charon_pj.get('reference')
    if charon_pj.get("sequencing_facility") == "NGI-S":
        analysis_object.sequencing_facility = "sthlm"
    elif charon_pj.get("sequencing_facility") == "NGI-U":
        analysis_object.sequencing_facility = "upps"
    else:
        LOG.error(
            "charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?"
        )
        raise RuntimeError
    fastq_files = []
    if reference_genome and reference_genome != 'other':
        for sample in analysis_object.project:
            try:
                charon_reported_status = charon_session.sample_get(
                    analysis_object.project.project_id,
                    sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                do_analyze = handle_sample_status(analysis_object, sample,
                                                  charon_reported_status)
                if not do_analyze:
                    continue
            except CharonError as e:
                LOG.error(e)

            for libprep in sample:
                charon_lp_status = charon_session.libprep_get(
                    analysis_object.project.project_id, sample.name,
                    libprep.name).get('qc')
                do_analyze = handle_libprep_status(analysis_object, libprep,
                                                   charon_lp_status)
                if not do_analyze:
                    continue
                else:
                    for seqrun in libprep:
                        charon_sr_status = charon_session.seqrun_get(
                            analysis_object.project.project_id, sample.name,
                            libprep.name, seqrun.name).get('alignment_status')
                        do_analyze = handle_seqrun_status(
                            analysis_object, seqrun, charon_sr_status)
                        if not do_analyze:
                            continue
                        else:
                            seqrun.being_analyzed = True
                            sample.being_analyzed = sample.being_analyzed or True
                            for fastq_file in seqrun.fastq_files:
                                fastq_path = os.path.join(
                                    analysis_object.project.base_path, "DATA",
                                    analysis_object.project.project_id,
                                    sample.name, libprep.name, seqrun.name,
                                    fastq_file)
                                fastq_files.append(fastq_path)

        if not fastq_files:
            LOG.error(
                "No fastq files obtained for the analysis fo project {}, please check the Charon status."
                .format(analysis_object.project.name))
        else:
            if analysis_object.restart_running_jobs:
                stop_ongoing_analysis(analysis_object)
            fastq_dir = preprocess_analysis(analysis_object, fastq_files)
            sbatch_path = write_batch_job(analysis_object, reference_genome,
                                          fastq_dir)
            job_id = start_analysis(sbatch_path)
            analysis_path = os.path.join(analysis_object.project.base_path,
                                         "ANALYSIS",
                                         analysis_object.project.project_id,
                                         'rna_ngi')
            record_project_job(analysis_object.project, job_id, analysis_path)
def main(inbox=None,
         num_days=14,
         genotype_files=None,
         config=None,
         config_file_path=None):
    if genotype_files:
        gt_files_valid = [
            os.path.abspath(gt_file) for gt_file in genotype_files
        ]
    else:
        if not inbox:
            try:
                inboxes = config["environment"]["flowcell_inbox"]
            except (KeyError, TypeError):
                raise ValueError(
                    "No path to delivery inbox specified by argument "
                    "or in configuration file ({}). Exiting.".format(
                        config_file_path))
        for inbox in inboxes:
            inbox = os.path.abspath(inbox)
            # Convert to seconds
            cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60)
            LOG.info("Searching for genotype files under {} modified after "
                     "{}".format(inbox, time.ctime(cutoff_age)))
            gt_files_valid = []
            for gt_file in filter(GENOTYPE_FILE_RE.match,
                                  glob.glob(os.path.join(inbox, "*"))):
                if os.stat(gt_file).st_mtime > time.time() - cutoff_age:
                    gt_files_valid.append(os.path.abspath(gt_file))

    if not gt_files_valid:
        LOG.info("No genotype files found under {} newer than "
                 "{}".format(inbox, time.ctime(cutoff_age)))
    else:
        charon_session = CharonSession()
        for gt_file_path in gt_files_valid:
            project_samples_dict = \
                    find_projects_from_samples(parse_samples_from_vcf(gt_file_path))
            for project_id, samples in project_samples_dict.iteritems():
                LOG.info("Updating project {}...".format(project_id))
                for sample in samples:
                    try:
                        genotype_status = \
                            charon_session.sample_get(projectid=project_id,
                                                      sampleid=sample).get("genotype_status")
                        if genotype_status in (None, "NOT_AVAILABLE"):
                            LOG.info('Updating sample {} genotype_status '
                                     'to "AVAILABLE"...'.format(sample))
                            charon_session.sample_update(
                                projectid=project_id,
                                sampleid=sample,
                                genotype_status="AVAILABLE")
                        else:
                            LOG.info('Not updating sample {} genotype_status '
                                     '(already "{}")'.format(
                                         sample, genotype_status))
                    except CharonError as e:
                        LOG.error(
                            'Could not update genotype status to "AVAILABLE" '
                            'for project/sample "{}/{}": {}'.format(
                                project_id, sample, e))
示例#16
0
def launch_analysis(level, projects_to_analyze, restart_failed_jobs=False,
                    config=None, config_file_path=None):
    """Launch the appropriate seqrun (flowcell-level) analysis for each fastq
    file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    # Update Charon with the local state of all the jobs we're running
    update_charon_with_local_jobs_status()
    charon_session = CharonSession()
    for project in projects_to_analyze:
        # Get information from Charon regarding which workflows to run
        try:
            # E.g. "NGI" for NGI DNA Samples
            workflow = charon_session.project_get(project.project_id)["pipeline"]
        except (KeyError, CharonError) as e:
            # Workflow missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(project, e))
            continue
        try:
            analysis_engine_module_name = config["analysis"]["workflows"][workflow]["analysis_engine"]
        except KeyError:
            error_msg = ("No analysis engine for workflow \"{}\" specified "
                         "in configuration file. Skipping this workflow "
                         "for project {}".format(workflow, project))
            LOG.error(error_msg)
            raise RuntimeError(error_msg)
        # Import the adapter module specified in the config file (e.g. piper_ngi)
        try:
            analysis_module = importlib.import_module(analysis_engine_module_name)
        except ImportError as e:
            error_msg = ('Skipping project "{}" workflow "{}": couldn\'t import '
                         'module "{}": {}'.format(project, workflow, analysis_engine_module_name, e))
            LOG.error(error_msg)
            # Next project
            continue

        # This is weird
        objects_to_process = []
        if level == "sample":
            for sample in project:
                objects_to_process.append({"project": project, "sample": sample})
        elif level == "seqrun":
            for sample in project:
                for libprep in sample:
                    for seqrun in libprep:
                        objects_to_process.append({"project": project,
                                                   "sample": sample,
                                                   "libprep": libprep,
                                                   "seqrun": seqrun})
        # Still weird and not so great
        for obj_dict in objects_to_process:
            project = obj_dict.get("project")
            sample = obj_dict.get("sample")
            libprep = obj_dict.get("libprep")
            seqrun = obj_dict.get("seqrun")

            try:
                if level == "seqrun":
                    charon_reported_status = charon_session.seqrun_get(project.project_id,
                                                                       sample, libprep,
                                                                       seqrun)['alignment_status']
                else: # sample-level
                    charon_reported_status = charon_session.sample_get(project.project_id,
                                                                       sample)['status']
            except (CharonError, KeyError) as e:
                LOG.warn('Unable to get required information from Charon for '
                          'sample "{}" / project "{}" -- forcing it to new: {}'.format(sample, project, e))
                if level == "seqrun":
                    charon_session.seqrun_update(project.project_id, sample.name, libprep.name, seqrun.name, alignment_status="NEW")
                    charon_reported_status = charon_session.seqrun_get(project.project_id,
                                                                       sample, libprep,
                                                                       seqrun)['alignment_status']
                else:
                    charon_session.sample_update(project.project_id, sample.name, status="NEW")
                    charon_reported_status = charon_session.sample_get(project.project_id,
                                                                       sample)['status']

            # Check Charon to ensure this hasn't already been processed
            if charon_reported_status in ("RUNNING", "DONE"):
                if level == "seqrun":
                    LOG.info('Charon reports seqrun analysis for project "{}" / sample "{}" '
                             '/ libprep "{}" / seqrun "{}" does not need processing '
                             ' (already "{}")'.format(project, sample, libprep, seqrun,
                                                      charon_reported_status))
                else: # Sample
                    LOG.info('Charon reports seqrun analysis for project "{}" / sample "{}" '
                             'does not need processing '
                             ' (already "{}")'.format(project, sample, charon_reported_status))
                continue
            elif charon_reported_status == "FAILED":
                if not restart_failed_jobs:
                    if level == "seqrun":
                        LOG.error('FAILED:  Project "{}" / sample "{}" / library "{}" '
                                  '/ flowcell "{}": Charon reports FAILURE, manual '
                                  'investigation needed!'.format(project, sample, libprep, seqrun))
                    else: # Sample
                        LOG.error('FAILED:  Project "{}" / sample "{}" Charon reports FAILURE, manual '
                                  'investigation needed!'.format(project, sample, libprep, seqrun))
                    continue
            try:
                # The engines themselves know which sub-workflows
                # they need to execute for a given level. For example,
                # with DNA Variant Calling on the sequencing run
                # level, we need to execute basic alignment and QC.
                if level == "seqrun":
                    LOG.info('Attempting to launch seqrun analysis for '
                             'project "{}" / sample "{}" / libprep "{}" '
                             '/ seqrun "{}", workflow "{}"'.format(project,
                                                                   sample,
                                                                   libprep,
                                                                   seqrun,
                                                                   workflow))
                    analysis_module.analyze_seqrun(project=project,
                                                   sample=sample,
                                                   libprep=libprep,
                                                   seqrun=seqrun)
                else: # sample level
                    LOG.info('Attempting to launch sample analysis for '
                             'project "{}" / sample "{}" / workflow '
                             '"{}"'.format(project, sample, workflow))
                    analysis_module.analyze_sample(project=project,
                                                   sample=sample)

            except Exception as e:
                raise
                LOG.error('Cannot process project "{}" / sample "{}" / '
                          'libprep "{}" / seqrun "{}" / workflow '
                          '"{}" : {}'.format(project, sample, libprep,
                                             seqrun, workflow, e))
                set_new_seqrun_status = "FAILED"
                continue
    parser.add_argument("-s", "--sample", required=True)
    parser.add_argument("-c",
                        "--coverage",
                        type=int,
                        required=True,
                        dest="required_coverage")

    args = parser.parse_args()

    project = args.project
    sample = args.sample
    required_coverage = args.required_coverage

    charon_session = CharonSession()
    try:
        reported_coverage = charon_session.sample_get(
            project, sample).get("total_autosomal_coverage")
    except CharonError as e:
        try:
            project = get_project_id_from_name(project)
        except (CharonError, RuntimeError, ValueError) as e:
            print(
                ('ERROR: Could not determine coverage for project {} / sample '
                 '{}: {}'.format(project, sample, e)),
                file=sys.stderr)
            reported_coverage = 0
        else:
            reported_coverage = charon_session.sample_get(
                project, sample).get("total_autosomal_coverage")
    if int(reported_coverage) >= int(required_coverage):
        sys.exit(0)
    else:
示例#18
0
def launch_analysis(projects_to_analyze, restart_failed_jobs=False,
                    restart_finished_jobs=False, restart_running_jobs=False,
                    keep_existing_data=False, no_qc=False, exec_mode="sbatch",
                    quiet=False, manual=False, config=None, config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    for project in projects_to_analyze: # Get information from Charon regarding which best practice analyses to run
        try:
            engine = get_engine_for_bp(project, config, config_file_path)
        except (RuntimeError, CharonError) as e:
            LOG.error('Project {} could not be processed: {}'.format(project, e))
            continue
        engine.local_process_tracking.update_charon_with_local_jobs_status(config=config)
    charon_session = CharonSession()
    for project in projects_to_analyze:
        try:
            project_status = charon_session.project_get(project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(project, e))
            continue
        if not project_status == "OPEN":
            error_text = ('Data found on filesystem for project "{}" but Charon '
                          'reports its status is not OPEN ("{}"). Not launching '
                          'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name, level="ERROR", info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e: # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample, qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = ('Cannot process project "{}" / sample "{}" / '
                                  'engine "{}" : {}'.format(project, sample,
                                                            analysis_module.__name__,
                                                            e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name, sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR", info_text=e)
            # Launch actual best-practice analysis
            try:
                charon_reported_status = charon_session.sample_get(project.project_id,
                                                                   sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                if charon_reported_status == "UNDER_ANALYSIS":
                    if not restart_running_jobs:
                        error_text = ('Charon reports seqrun analysis for project "{}" '
                                      '/ sample "{}" does not need processing (already '
                                      '"{}")'.format(project, sample, charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
                elif charon_reported_status == "ANALYZED":
                    if not restart_finished_jobs:
                        error_text = ('Charon reports seqrun analysis for project "{}" '
                                      '/ sample "{}" does not need processing (already '
                                      '"{}")'.format(project, sample, charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet') and not config.get('manual'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
                elif charon_reported_status == "FAILED":
                    if not restart_failed_jobs:
                        error_text = ('FAILED:  Project "{}" / sample "{}" Charon reports '
                                      'FAILURE, manual investigation needed!'.format(project, sample))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
            except CharonError as e:
                LOG.error(e)
                continue
            try:
                LOG.info('Attempting to launch sample analysis for '
                         'project "{}" / sample "{}" / engine'
                         '"{}"'.format(project, sample, analysis_module.__name__))
                #actual analysis launch
                analysis_module.analyze(project=project,
                                        sample=sample,
                                        restart_finished_jobs=restart_finished_jobs,
                                        restart_running_jobs=restart_running_jobs,
                                        keep_existing_data=keep_existing_data,
                                        exec_mode=exec_mode,
                                        config=config,
                                        generate_bqsr_bam=generate_bqsr_bam)
            except Exception as e:
                error_text = ('Cannot process project "{}" / sample "{}" / '
                              'engine "{}" : {}'.format(project, sample,
                                                        analysis_module.__name__,
                                                        e))
                LOG.error(error_text)
                if not config.get("quiet"):
                    mail_analysis(project_name=project.name, sample_name=sample.name,
                                  engine_name=analysis_module.__name__,
                                  level="ERROR", info_text=e)
                continue
示例#19
0
def analyze(analysis_object,
            level='sample',
            config=None,
            config_file_path=None):
    """Analyze data at the sample level.

    :param NGIAnalysis analysis_object: holds all the parameters for the analysis

    :raises ValueError: If exec_mode is an unsupported value
    """
    charon_session = CharonSession()
    for sample in analysis_object.project:
        try:
            charon_reported_status = charon_session.sample_get(
                analysis_object.project.project_id,
                sample).get('analysis_status')
            # Check Charon to ensure this hasn't already been processed
            do_analyze = handle_sample_status(analysis_object, sample,
                                              charon_reported_status)
            if not do_analyze:
                continue
        except CharonError as e:
            LOG.error(e)
            continue
        if level == "sample":
            status_field = "alignment_status"
        elif level == "genotype":
            status_field = "genotype_status"
        else:
            LOG.warn('Unknown workflow level: "{}"'.format(level))
            status_field = "alignment_status"  # Or should we abort?
        try:
            check_for_preexisting_sample_runs(
                analysis_object.project, sample,
                analysis_object.restart_running_jobs,
                analysis_object.restart_finished_jobs, status_field)
        except RuntimeError as e:
            raise RuntimeError(
                'Aborting processing of project/sample "{}/{}": '
                '{}'.format(analysis_object.project, sample, e))
        if analysis_object.exec_mode.lower() not in ("sbatch", "local"):
            raise ValueError(
                '"exec_mode" param must be one of "sbatch" or "local" '
                'value was "{}"'.format(analysis_object.exec_mode))
        if analysis_object.exec_mode == "local":
            modules_to_load = analysis_object.config.get("piper", {}).get(
                "load_modules", [])
            load_modules(modules_to_load)
        for workflow_subtask in workflows.get_subtasks_for_level(level=level):
            if level == "genotype":
                genotype_status = None  # Some records in Charon lack this field, I'm guessing
                try:
                    charon_session = CharonSession()
                    genotype_status = charon_session.sample_get(
                        projectid=analysis_object.project.project_id,
                        sampleid=sample.name).get("genotype_status")
                except CharonError as e:
                    LOG.error(
                        'Couldn\'t determine genotyping status for project/'
                        'sample "{}/{}"; skipping analysis.'.format(
                            analysis_object.project, sample))
                    continue
                if find_previous_genotype_analyses(
                        analysis_object.project,
                        sample) or genotype_status == "DONE":
                    if not analysis_object.restart_finished_jobs:
                        LOG.info(
                            'Project/sample "{}/{}" has completed genotype '
                            'analysis previously; skipping (use flag to force '
                            'analysis)'.format(analysis_object.project,
                                               sample))
                        continue
            if analysis_object.restart_running_jobs:
                # Kill currently-running jobs if they exist
                kill_running_sample_analysis(
                    workflow_subtask=workflow_subtask,
                    project_id=analysis_object.project.project_id,
                    sample_id=sample.name)
            # This checks the local jobs database
            if not is_sample_analysis_running_local(
                    workflow_subtask=workflow_subtask,
                    project_id=analysis_object.project.project_id,
                    sample_id=sample.name):
                LOG.info('Launching "{}" analysis for sample "{}" in project '
                         '"{}"'.format(workflow_subtask, sample,
                                       analysis_object.project))
                try:
                    log_file_path = create_log_file_path(
                        workflow_subtask=workflow_subtask,
                        project_base_path=analysis_object.project.base_path,
                        project_name=analysis_object.project.dirname,
                        project_id=analysis_object.project.project_id,
                        sample_id=sample.name)
                    rotate_file(log_file_path)
                    exit_code_path = create_exit_code_file_path(
                        workflow_subtask=workflow_subtask,
                        project_base_path=analysis_object.project.base_path,
                        project_name=analysis_object.project.dirname,
                        project_id=analysis_object.project.project_id,
                        sample_id=sample.name)
                    if level == "sample":
                        if not analysis_object.keep_existing_data:
                            remove_previous_sample_analyses(
                                analysis_object.project, sample)
                            default_files_to_copy = None
                    elif level == "genotype":
                        if not analysis_object.keep_existing_data:
                            remove_previous_genotype_analyses(
                                analysis_object.project)
                            default_files_to_copy = None

                    # Update the project to keep only valid fastq files for setup.xml creation
                    if level == "genotype":
                        updated_project, default_files_to_copy = \
                                collect_files_for_sample_analysis(analysis_object.project,
                                                                  sample,
                                                                  restart_finished_jobs=True,
                                                                  status_field="genotype_status")
                    else:
                        updated_project, default_files_to_copy = \
                                collect_files_for_sample_analysis(analysis_object.project,
                                                                  sample,
                                                                  analysis_object.restart_finished_jobs,
                                                                  status_field="alignment_status")
                    setup_xml_cl, setup_xml_path = build_setup_xml(
                        project=updated_project,
                        sample=sample,
                        workflow=workflow_subtask,
                        local_scratch_mode=(
                            analysis_object.exec_mode == "sbatch"),
                        config=analysis_object.config)
                    piper_cl = build_piper_cl(
                        project=analysis_object.project,
                        workflow_name=workflow_subtask,
                        setup_xml_path=setup_xml_path,
                        exit_code_path=exit_code_path,
                        config=analysis_object.config,
                        exec_mode=analysis_object.exec_mode,
                        generate_bqsr_bam=analysis_object.generate_bqsr_bam)
                    if analysis_object.exec_mode == "sbatch":
                        process_id = None
                        slurm_job_id = sbatch_piper_sample(
                            [setup_xml_cl, piper_cl],
                            workflow_subtask,
                            analysis_object.project,
                            sample,
                            restart_finished_jobs=analysis_object.
                            restart_finished_jobs,
                            files_to_copy=default_files_to_copy)
                        for x in xrange(10):
                            # Time delay to let sbatch get its act together
                            # (takes a few seconds to be visible with sacct)
                            try:
                                get_slurm_job_status(slurm_job_id)
                                break
                            except ValueError:
                                time.sleep(2)
                        else:
                            LOG.error('sbatch file for sample {}/{} did not '
                                      'queue properly! Job ID {} cannot be '
                                      'found.'.format(analysis_object.project,
                                                      sample, slurm_job_id))
                    else:  # "local"
                        raise NotImplementedError(
                            'Local execution not currently implemented. '
                            'I\'m sure Denis can help you with this.')
                        #slurm_job_id = None
                        #launch_piper_job(setup_xml_cl, project)
                        #process_handle = launch_piper_job(piper_cl, project)
                        #process_id = process_handle.pid
                    try:
                        record_process_sample(
                            project=analysis_object.project,
                            sample=sample,
                            analysis_module_name="piper_ngi",
                            slurm_job_id=slurm_job_id,
                            process_id=process_id,
                            workflow_subtask=workflow_subtask)
                    except RuntimeError as e:
                        LOG.error(e)
                        ## Question: should we just kill the run in this case or let it go?
                        continue
                except (NotImplementedError, RuntimeError, ValueError) as e:
                    error_msg = (
                        'Processing project "{}" / sample "{}" / workflow "{}" '
                        'failed: {}'.format(analysis_object.project, sample,
                                            workflow_subtask, e))
                    LOG.error(error_msg)
示例#20
0
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    if quiet and not config.get("quiet"):
        config['quiet'] = True
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    multiqc_projects=set()
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine = sample_entry.engine
            # Only one of these id fields (slurm, pid) will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            if workflow not in ("merge_process_variantcall", "genotype_concordance",):
                LOG.error('Unknown workflow "{}" for {}; cannot update '
                          'Charon. Skipping sample.'.format(workflow, label))
                continue

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for {} run {}/{}: {}'.format(workflow,
                                                                   project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name,
                                  sample_name=sample_id,
                                  engine_name=engine,
                                  level="ERROR",
                                  info_text=error_text,
                                  workflow=workflow)
                continue
            try:
                if piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                        set_status = "ANALYZED" # sample level
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                        set_status = "DONE" # sample level
                    recurse_status = "DONE" # For the seqrun level
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow,
                                                                        label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="INFO",
                                      info_text=info_text,
                                      workflow=workflow)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj,
                                              status_field=seqrun_status_field,
                                              status_value=recurse_status,
                                              config=config)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    #add project to MultiQC
                    multiqc_projects.add((project_base_path, project_id, project_name))


                    if workflow == "merge_process_variantcall":
                        # Parse seqrun output results / update Charon
                        # This is a semi-optional step -- failure here will send an
                        # email but not more than once. The record is still removed
                        # from the local jobs database, so this will have to be done
                        # manually if you want it done at all.
                        piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "02_preliminary_alignment_qc")
                        update_coverage_for_sample_seqruns(project_id, sample_id,
                                                           piper_qc_dir)
                        update_sample_duplication_and_coverage(project_id, sample_id,
                                                           project_base_path)

                        
                    elif workflow == "genotype_concordance":
                        piper_gt_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "03_genotype_concordance")
                        try:
                            update_gtc_for_sample(project_id, sample_id, piper_gt_dir)
                        except (CharonError, IOError, ValueError) as e:
                            LOG.error(e)
                elif type(piper_exit_code) is int and piper_exit_code > 0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                  '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="ERROR",
                                      info_text=error_text,
                                      workflow=workflow)
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj, status_field=seqrun_status_field,
                                              status_value=set_status, config=config)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running '
                                      'for {} / {}: setting status to {} in '
                                      'Charon'.format(label, workflow, set_status))
                        if slurm_job_id:
                            exit_code_file_path = \
                                create_exit_code_file_path(workflow_subtask=workflow,
                                                           project_base_path=project_base_path,
                                                           project_name=project_name,
                                                           project_id=project_id,
                                                           sample_id=sample_id)
                            error_text += (' (slurm job id "{}", exit code file path '
                                           '"{}")'.format(slurm_job_id, exit_code_file_path))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name,
                                          sample_name=sample_id,
                                          engine_name=engine, level="ERROR",
                                          info_text=error_text,
                                          workflow=workflow)
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     **{sample_status_field: set_status})
                        recurse_status_for_sample(project_obj,
                                                  status_field=seqrun_status_field,
                                                  status_value=set_status,
                                                  config=config)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        set_status = "UNDER_ANALYSIS"
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                            recurse_status = "RUNNING"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                            recurse_status = "UNDER_ANALYSIS"
                        try:
                            remote_sample=charon_session.sample_get(projectid=project_id, sampleid=sample_id)
                            charon_status = remote_sample.get(sample_status_field)
                            if charon_status and not charon_status == set_status:
                                LOG.warning('Tracking inconsistency for {}: Charon status '
                                         'for field "{}" is "{}" but local process tracking '
                                         'database indicates it is running. Setting value '
                                         'in Charon to {}.'.format(label, sample_status_field,
                                                                   charon_status, set_status))
                                charon_session.sample_update(projectid=project_id,
                                                             sampleid=sample_id,
                                                             **{sample_status_field: set_status})
                                recurse_status_for_sample(project_obj,
                                                          status_field=seqrun_status_field,
                                                          status_value=recurse_status,
                                                          config=config)
                        except CharonError as e:
                            error_text = ('Unable to update/verify Charon '
                                          'for {}: {}'.format(label, e))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name, sample_name=sample_id,
                                              engine_name=engine, level="ERROR",
                                              workflow=workflow, info_text=error_text)
            except CharonError as e:
                error_text = ('Unable to update Charon for {}: '
                              '{}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              '"{}" status for "{}": {}'.format(workflow, label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
        session.commit()
    #Run Multiqc
    for pj_tuple in multiqc_projects:
        LOG.info("Running MultiQC on project {}".format(pj_tuple[1]))
        run_multiqc(pj_tuple[0], pj_tuple[1], pj_tuple[2])
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    if quiet and not config.get("quiet"):
        config['quiet'] = True
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine = sample_entry.engine
            # Only one of these id fields (slurm, pid) will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            if workflow not in ("merge_process_variantcall", "genotype_concordance",):
                LOG.error('Unknown workflow "{}" for {}; cannot update '
                          'Charon. Skipping sample.'.format(workflow, label))
                continue

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for {} run {}/{}: {}'.format(workflow,
                                                                   project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name,
                                  sample_name=sample_id,
                                  engine_name=engine,
                                  level="ERROR",
                                  info_text=error_text,
                                  workflow=workflow)
                continue
            try:
                if piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                        set_status = "ANALYZED" # sample level
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                        set_status = "DONE" # sample level
                    recurse_status = "DONE" # For the seqrun level
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow,
                                                                        label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="INFO",
                                      info_text=info_text,
                                      workflow=workflow)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj,
                                              status_field=seqrun_status_field,
                                              status_value=recurse_status,
                                              config=config)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    #run MultiQC
                    LOG.info("Running MultiQC on project {}".format(project_name))
                    try:
                        run_multiqc(project_base_path, project_id, project_name)
                    except Exception as e:
                        LOG.error(e)


                    if workflow == "merge_process_variantcall":
                        # Parse seqrun output results / update Charon
                        # This is a semi-optional step -- failure here will send an
                        # email but not more than once. The record is still removed
                        # from the local jobs database, so this will have to be done
                        # manually if you want it done at all.
                        piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "02_preliminary_alignment_qc")
                        update_coverage_for_sample_seqruns(project_id, sample_id,
                                                           piper_qc_dir)
                        update_sample_duplication_and_coverage(project_id, sample_id,
                                                           project_base_path)

                        
                    elif workflow == "genotype_concordance":
                        piper_gt_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "03_genotype_concordance")
                        try:
                            update_gtc_for_sample(project_id, sample_id, piper_gt_dir)
                        except (CharonError, IOError, ValueError) as e:
                            LOG.error(e)
                elif type(piper_exit_code) is int and piper_exit_code > 0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                  '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="ERROR",
                                      info_text=error_text,
                                      workflow=workflow)
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj, status_field=seqrun_status_field,
                                              status_value=set_status, config=config)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running '
                                      'for {} / {}: setting status to {} in '
                                      'Charon'.format(label, workflow, set_status))
                        if slurm_job_id:
                            exit_code_file_path = \
                                create_exit_code_file_path(workflow_subtask=workflow,
                                                           project_base_path=project_base_path,
                                                           project_name=project_name,
                                                           project_id=project_id,
                                                           sample_id=sample_id)
                            error_text += (' (slurm job id "{}", exit code file path '
                                           '"{}")'.format(slurm_job_id, exit_code_file_path))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name,
                                          sample_name=sample_id,
                                          engine_name=engine, level="ERROR",
                                          info_text=error_text,
                                          workflow=workflow)
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     **{sample_status_field: set_status})
                        recurse_status_for_sample(project_obj,
                                                  status_field=seqrun_status_field,
                                                  status_value=set_status,
                                                  config=config)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        set_status = "UNDER_ANALYSIS"
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                            recurse_status = "RUNNING"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                            recurse_status = "UNDER_ANALYSIS"
                        try:
                            charon_status = \
                                    charon_session.sample_get(projectid=project_id,
                                                              sampleid=sample_id).get(sample_status_field)
                            if charon_status and not charon_status == set_status:
                                LOG.warn('Tracking inconsistency for {}: Charon status '
                                         'for field "{}" is "{}" but local process tracking '
                                         'database indicates it is running. Setting value '
                                         'in Charon to {}.'.format(label, sample_status_field,
                                                                   charon_status, set_status))
                                charon_session.sample_update(projectid=project_id,
                                                             sampleid=sample_id,
                                                             **{sample_status_field: set_status})
                                recurse_status_for_sample(project_obj,
                                                          status_field=seqrun_status_field,
                                                          status_value=recurse_status,
                                                          config=config)
                        except CharonError as e:
                            error_text = ('Unable to update/verify Charon '
                                          'for {}: {}'.format(label, e))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name, sample_name=sample_id,
                                              engine_name=engine, level="ERROR",
                                              workflow=workflow, info_text=error_text)
            except CharonError as e:
                error_text = ('Unable to update Charon for {}: '
                              '{}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              '"{}" status for "{}": {}'.format(workflow, label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
        session.commit()
from ngi_pipeline.database.communicate import get_project_id_from_name

if __name__=="__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-p", "--project", required=True)
    parser.add_argument("-s", "--sample", required=True)
    parser.add_argument("-c", "--coverage", type=int, required=True, dest="required_coverage")

    args = parser.parse_args()

    project = args.project
    sample = args.sample
    required_coverage = args.required_coverage

    charon_session = CharonSession()
    try:
        reported_coverage = charon_session.sample_get(project, sample).get("total_autosomal_coverage")
    except CharonError as e:
        try:
            project = get_project_id_from_name(project)
        except (CharonError, RuntimeError, ValueError) as e:
            print(('ERROR: Could not determine coverage for project {} / sample '
                    '{}: {}'.format(project, sample, e)), file=sys.stderr)
            reported_coverage = 0
        else:
            reported_coverage = charon_session.sample_get(project, sample).get("total_autosomal_coverage")
    if int(reported_coverage) >= int(required_coverage):
        sys.exit(0)
    else:
        sys.exit(1)