示例#1
0
def update_analysis(project_id, status):
    charon_session = CharonSession()
    mail_analysis(project_id,
                  engine_name='rna_ngi',
                  level='INFO' if status else 'ERROR')
    new_sample_status = 'ANALYZED' if status else 'FAILED'
    new_seqrun_status = 'DONE' if status else 'FAILED'
    for sample in charon_session.project_get_samples(project_id).get(
            "samples", {}):
        if sample.get('analysis_status') == "UNDER_ANALYSIS":
            LOG.info("Marking analysis of sample {}/{} as {}".format(
                project_id, sample.get('sampleid'), new_sample_status))
            charon_session.sample_update(project_id,
                                         sample.get('sampleid'),
                                         analysis_status=new_sample_status)
            for libprep in charon_session.sample_get_libpreps(
                    project_id, sample.get('sampleid')).get('libpreps', {}):
                if libprep.get('qc') != 'FAILED':
                    for seqrun in charon_session.libprep_get_seqruns(
                            project_id, sample.get('sampleid'),
                            libprep.get('libprepid')).get('seqruns', {}):
                        if seqrun.get('alignment_status') == "RUNNING":
                            LOG.info(
                                "Marking analysis of seqrun {}/{}/{}/{} as {}".
                                format(project_id, sample.get('sampleid'),
                                       libprep.get('libprepid'),
                                       seqrun.get('seqrunid'),
                                       new_seqrun_status))
                            charon_session.seqrun_update(
                                project_id,
                                sample.get('sampleid'),
                                libprep.get('libprepid'),
                                seqrun.get('seqrunid'),
                                alignment_status=new_seqrun_status)
示例#2
0
def write_to_charon_NGI_results(job_id, return_code, run_dir):
    """Update the status of a sequencing run after alignment.

    :param NGIProject project_id: The name of the project, sample, lib prep, flowcell id
    :param int return_code: The return code of the workflow process
    :param string run_dir: the directory where results are stored (I know that I am running piper)

    :raises RuntimeError: If the Charon database could not be updated
    """
    charon_session = CharonSession()
    # Consider moving this mapping to the CharonSession object or something
    if return_code is None:
        status = "RUNNING"
    elif return_code == 0:
        status = "DONE"
    else:
        ## TODO we need to differentiate between COMPUTATION_FAILED and DATA_FAILED
        ##      also there is IGNORE?
        status = "COMPUTATION_FAILED"
    try:
        m_dict = STHLM_UUSNP_SAMPLE_RE.match(job_id).groupdict()
        #m_dict = re.match(r'?P<project_name>\w\.\w+_\d+_\d+|\w{2}-\d+)_(?P<sample_id>[\w-]+)_(?P<libprep_id>\w|\w{2}\d{3}_\2)_(?P<seqrun_id>\d{6}_\w+_\d{4}_.{10})', job_id).groupdict()
        project_id = get_project_id_from_name(m_dict['project_name'])
        sample_id = m_dict['sample_id']
    except (TypeError, AttributeError):
        error_msg = "Could not parse project/sample ids from job id \"{}\"; cannot update Charon with results!".format(
            job_id)
        raise RuntimeError(error_msg)
    try:
        charon_session.sample_update(project_id, sample_id, status=status)
    except CharonError as e:
        error_msg = ('Failed to update sample status to "{}" for sample "{}" '
                     'in Charon database: {}'.format(status, project_id,
                                                     sample_id, e))
        raise RuntimeError(error_msg)
示例#3
0
 def save_delivery_token_in_charon(self, delivery_token):
     '''Updates delivery_token in Charon at sample level
     '''
     charon_session = CharonSession()
     charon_session.sample_update(self.projectid,
                                  self.sampleid,
                                  delivery_token=delivery_token)
示例#4
0
 def add_supr_name_delivery_in_charon(self, supr_name_of_delivery):
     '''Updates delivery_projects in Charon at project level
     '''
     charon_session = CharonSession()
     try:
         #fetch the project
         sample_charon = charon_session.sample_get(self.projectid,
                                                   self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if supr_name_of_delivery not in sample_charon:
             delivery_projects.append(supr_name_of_delivery)
             charon_session.sample_update(
                 self.projectid,
                 self.sampleid,
                 delivery_projects=delivery_projects)
             logger.info(
                 'Charon delivery_projects for sample {} updated with value {}'
                 .format(self.sampleid, supr_name_of_delivery))
         else:
             logger.warn(
                 'Charon delivery_projects for sample {} not updated with value {} because the value was already present'
                 .format(self.sampleid, supr_name_of_delivery))
     except Exception, e:
         logger.error(
             'Failed to update delivery_projects in charon while delivering {}. Error says: {}'
             .format(self.sampleid, e))
         logger.exception(e)
def update_gtc_for_sample(project_id, sample_id, piper_gtc_path, config=None, config_file_path=None):
    """Find the genotype concordance file for this sample, if it exists,
    and update the sample record in Charon with the value parsed from it.

    :param str project_id: The id of the project
    :param str sample_id: The id the sample
    :param str piper_gtc_path: The path to the piper genotype concordance directory

    :raises CharonError: If there is some Error -- with Charon
    :raises IOError: If the path specified is missing or inaccessible
    :raises ValueError: If the specified sample has no data in the gtc file
    """
    gtc_file = os.path.join(piper_gtc_path, "{}.gt_concordance".format(sample_id))
    try:
        concordance_value = parse_genotype_concordance(gtc_file)[sample_id]
    except KeyError:
        raise ValueError('Concordance data for sample "{}" not found in gt '
                         'concordance file "{}"'.format(sample_id, gtc_file))
    gtc_lower_bound = config.get("genotyping", {}).get("lower_bound_cutoff")
    status_dict = {}
    if gtc_lower_bound:
        if concordance_value < concordance_value:
            status_dict = {"genotype_status": "FAILED"}
        else:
            status_dict = {"genotype_status": "PASSED"}
    charon_session = CharonSession()
    charon_session.sample_update(projectid=project_id, sampleid=sample_id,
                                 genotype_concordance=concordance_value,
                                 **status_dict)
示例#6
0
def update_gtc_for_sample(project_id, sample_id, piper_gtc_path, config=None, config_file_path=None):
    """Find the genotype concordance file for this sample, if it exists,
    and update the sample record in Charon with the value parsed from it.

    :param str project_id: The id of the project
    :param str sample_id: The id the sample
    :param str piper_gtc_path: The path to the piper genotype concordance directory

    :raises CharonError: If there is some Error -- with Charon
    :raises IOError: If the path specified is missing or inaccessible
    :raises ValueError: If the specified sample has no data in the gtc file
    """
    gtc_file = os.path.join(piper_gtc_path, "{}.gt_concordance".format(sample_id))
    try:
        concordance_value = parse_genotype_concordance(gtc_file)[sample_id]
    except KeyError:
        raise ValueError('Concordance data for sample "{}" not found in gt '
                         'concordance file "{}"'.format(sample_id, gtc_file))
    gtc_lower_bound = config.get("genotyping", {}).get("lower_bound_cutoff")
    status_dict = {}
    if gtc_lower_bound:
        if concordance_value < concordance_value:
            status_dict = {"genotype_status": "FAILED"}
        else:
            status_dict = {"genotype_status": "PASSED"}
    charon_session = CharonSession()
    charon_session.sample_update(projectid=project_id, sampleid=sample_id,
                                 genotype_concordance=concordance_value,
                                 **status_dict)
def write_to_charon_NGI_results(job_id, return_code, run_dir):
    """Update the status of a sequencing run after alignment.

    :param NGIProject project_id: The name of the project, sample, lib prep, flowcell id
    :param int return_code: The return code of the workflow process
    :param string run_dir: the directory where results are stored (I know that I am running piper)

    :raises RuntimeError: If the Charon database could not be updated
    """
    charon_session = CharonSession()
    # Consider moving this mapping to the CharonSession object or something
    if return_code is None:
        status = "RUNNING"
    elif return_code == 0:
        status = "DONE"
    else:
        ## TODO we need to differentiate between COMPUTATION_FAILED and DATA_FAILED
        ##      also there is IGNORE?
        status = "COMPUTATION_FAILED"
    try:
        m_dict = STHLM_UUSNP_SAMPLE_RE.match(job_id).groupdict()
        #m_dict = re.match(r'?P<project_name>\w\.\w+_\d+_\d+|\w{2}-\d+)_(?P<sample_id>[\w-]+)_(?P<libprep_id>\w|\w{2}\d{3}_\2)_(?P<seqrun_id>\d{6}_\w+_\d{4}_.{10})', job_id).groupdict()
        project_id = get_project_id_from_name(m_dict['project_name'])
        sample_id = m_dict['sample_id']
    except (TypeError, AttributeError):
        error_msg = "Could not parse project/sample ids from job id \"{}\"; cannot update Charon with results!".format(job_id)
        raise RuntimeError(error_msg)
    try:
        charon_session.sample_update(project_id, sample_id, status=status)
    except CharonError as e:
        error_msg = ('Failed to update sample status to "{}" for sample "{}" '
                     'in Charon database: {}'.format(status, project_id, sample_id, e))
        raise RuntimeError(error_msg)
def update_sample_duplication_and_coverage(project_id,
                                           sample_id,
                                           project_base_path,
                                           config=None,
                                           config_file_path=None):
    """Update Charon with the duplication rates for said sample.

    :param str project_base_path: The path to the project dir 
    :param str sample_id: The sample name (e.g. P1170_105)

    """

    dup_file_path = os.path.join(project_base_path, 'ANALYSIS', project_id,
                                 'piper_ngi', '05_processed_alignments',
                                 "{}.metrics".format(sample_id))
    genome_results_file_path = os.path.join(
        project_base_path, 'ANALYSIS', project_id, 'piper_ngi',
        '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id),
        "genome_results.txt")

    try:
        dup_pc = parse_deduplication_percentage(dup_file_path)
    except:
        dup_pc = 0
        LOG.error(
            "Cannot find {}.metrics file for duplication rate at {}. Continuing."
            .format(sample_id, dup_file_path))
    try:
        cov = parse_qualimap_coverage(genome_results_file_path)
        reads = parse_qualimap_reads(genome_results_file_path)
    except IOError as e:
        cov = 0
        reads = 0
        LOG.error(
            "Cannot find genome_results.txt file for sample coverage at {}. Continuing."
            .format(genome_results_file_path))
    try:
        charon_session = CharonSession()
        charon_session.sample_update(projectid=project_id,
                                     sampleid=sample_id,
                                     duplication_pc=dup_pc,
                                     total_sequenced_reads=reads,
                                     total_autosomal_coverage=cov)
        LOG.info(
            'Updating sample "{}" in '
            'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"'
            .format(sample_id, dup_pc, cov))
    except CharonError as e:
        error_text = ('Could not update project/sample "{}/{}" '
                      'in Charon with duplication rate : {}'
                      'and coverage {}'.format("{}/{}".format(
                          project_id, sampleid, dup_pc, cov)))
        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id,
                          sample_name=sample_id,
                          engine_name="piper_ngi",
                          level="ERROR",
                          info_text=error_text)
def update_gt_status_in_charon(sample_id, status, concordance=None):
    project_id = sample_id.split('_')[0]
    try:
        charon_session = CharonSession()
        sample = charon_session.sample_get(project_id, sample_id)
        if concordance is None:
            if sample.get('genotype_status') != status:
                charon_session.sample_update(projectid=project_id, sampleid=sample_id,genotype_status=status)
        else:
            if sample.get('genotype_status') != status or sample.get('genotype_concordance') != concordance:
                charon_session.sample_update(projectid=project_id, sampleid=sample_id,genotype_status=status, genotype_concordance=concordance)
    except CharonError as e:
        return str(e)
def main(inbox=None, num_days=14, genotype_files=None, config=None, config_file_path=None):
    if genotype_files:
        gt_files_valid = [os.path.abspath(gt_file) for gt_file in genotype_files]
    else:
        if not inbox:
            try:
                inboxes = config["environment"]["flowcell_inbox"]
            except (KeyError, TypeError):
                raise ValueError("No path to delivery inbox specified by argument "
                                 "or in configuration file ({}). Exiting.".format(config_file_path))
        for inbox in inboxes:
            inbox = os.path.abspath(inbox)
            # Convert to seconds
            cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60)
            LOG.info("Searching for genotype files under {} modified after "
                     "{}".format(inbox, time.ctime(cutoff_age)))
            gt_files_valid = []
            for gt_file in filter(GENOTYPE_FILE_RE.match, glob.glob(os.path.join(inbox, "*"))):
                if os.stat(gt_file).st_mtime > time.time() - cutoff_age:
                    gt_files_valid.append(os.path.abspath(gt_file))

    if not gt_files_valid:
        LOG.info("No genotype files found under {} newer than "
                 "{}".format(inbox, time.ctime(cutoff_age)))
    else:
        charon_session = CharonSession()
        for gt_file_path in gt_files_valid:
            project_samples_dict = \
                    find_projects_from_samples(parse_samples_from_vcf(gt_file_path))
            for project_id, samples in project_samples_dict.iteritems():
                LOG.info("Updating project {}...".format(project_id))
                for sample in samples:
                    try:
                        genotype_status = \
                            charon_session.sample_get(projectid=project_id,
                                                      sampleid=sample).get("genotype_status")
                        if genotype_status in (None, "NOT_AVAILABLE"):
                            LOG.info('Updating sample {} genotype_status '
                                     'to "AVAILABLE"...'.format(sample))
                            charon_session.sample_update(projectid=project_id,
                                                         sampleid=sample,
                                                         genotype_status="AVAILABLE")
                        else:
                            LOG.info('Not updating sample {} genotype_status '
                                     '(already "{}")'.format(sample, genotype_status))
                    except CharonError as e:
                        LOG.error('Could not update genotype status to "AVAILABLE" '
                                  'for project/sample "{}/{}": {}'.format(project_id,
                                                                          sample,
                                                                          e))
示例#11
0
 def add_supr_name_delivery_in_charon(self, supr_name_of_delivery):
     '''Updates delivery_projects in Charon at project level
     '''
     charon_session = CharonSession()
     try:
         #fetch the project
         sample_charon = charon_session.sample_get(self.projectid, self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if supr_name_of_delivery not in sample_charon:
             delivery_projects.append(supr_name_of_delivery)
             charon_session.sample_update(self.projectid, self.sampleid, delivery_projects=delivery_projects)
             logger.info('Charon delivery_projects for sample {} updated with value {}'.format(self.sampleid, supr_name_of_delivery))
         else:
             logger.warn('Charon delivery_projects for sample {} not updated with value {} because the value was already present'.format(self.sampleid, supr_name_of_delivery))
     except Exception, e:
         logger.error('Failed to update delivery_projects in charon while delivering {}. Error says: {}'.format(self.sampleid, e))
         logger.exception(e)
示例#12
0
def main(project):

    charon_session = CharonSession()
    samples = charon_session.project_get_samples(project)
    for sample in samples["samples"]:
        
        charon_session.sample_update(project, sample["sampleid"],
                                     analysis_status= "TO_ANALYZE",
                                     genotype_status=None,
                                     total_autosomal_coverage="0",
                                     total_sequenced_reads="0")
        for sample_prep in charon_session.sample_get_libpreps(project, sample["sampleid"])['libpreps']:
            seqruns = charon_session.libprep_get_seqruns(project, sample["sampleid"], sample_prep["libprepid"])['seqruns']
            for seqrun in seqruns:
                charon_session.seqrun_update(project, sample["sampleid"], sample_prep["libprepid"], seqrun["seqrunid"],
                                             mean_autosomal_coverage = "0",
                                             alignment_status  = "NOT_RUNNING")
示例#13
0
 def add_dds_name_delivery_in_charon(self, name_of_delivery):
     """Updates delivery_projects in Charon at project level
     """
     charon_session = CharonSession()
     try:
         # Fetch the project
         sample_charon = charon_session.sample_get(self.projectid, self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if name_of_delivery not in sample_charon:
             delivery_projects.append(name_of_delivery)
             charon_session.sample_update(self.projectid, self.sampleid, delivery_projects=delivery_projects)
             logger.info('Charon delivery_projects for sample {} updated '
                         'with value {}'.format(self.sampleid, name_of_delivery))
         else:
             logger.warn('Charon delivery_projects for sample {} not updated '
                         'with value {} because the value was already present'.format(self.sampleid, name_of_delivery))
     except Exception as e:
         logger.exception('Failed to update delivery_projects in charon while delivering {}.'.format(self.sampleid))
示例#14
0
def update_gt_status_in_charon(sample_id, status, concordance=None):
    project_id = sample_id.split('_')[0]
    try:
        charon_session = CharonSession()
        sample = charon_session.sample_get(project_id, sample_id)
        if concordance is None:
            if sample.get('genotype_status') != status:
                charon_session.sample_update(projectid=project_id,
                                             sampleid=sample_id,
                                             genotype_status=status)
        else:
            if sample.get('genotype_status') != status or sample.get(
                    'genotype_concordance') != concordance:
                charon_session.sample_update(projectid=project_id,
                                             sampleid=sample_id,
                                             genotype_status=status,
                                             genotype_concordance=concordance)
    except CharonError as e:
        return str(e)
def update_sample_duplication_and_coverage(project_id, sample_id, project_base_path,
                                       config=None, config_file_path=None):
    """Update Charon with the duplication rates for said sample.

    :param str project_base_path: The path to the project dir 
    :param str sample_id: The sample name (e.g. P1170_105)

    """
    
    dup_file_path=os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '05_processed_alignments', "{}.metrics".format(sample_id))
    genome_results_file_path=os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id),"genome_results.txt")

    try:
        dup_pc=parse_deduplication_percentage(dup_file_path)
    except:
        dup_pc=0
        LOG.error("Cannot find {}.metrics file for duplication rate at {}. Continuing.".format(sample_id, dup_file_path))
    try:
        cov=parse_qualimap_coverage(genome_results_file_path)
        reads=parse_qualimap_reads(genome_results_file_path)
    except IOError as e:
        cov=0
        reads=0
        LOG.error("Cannot find genome_results.txt file for sample coverage at {}. Continuing.".format(genome_results_file_path))
    try:
        charon_session = CharonSession()
        charon_session.sample_update(projectid=project_id,
                                     sampleid=sample_id,
                                     duplication_pc=dup_pc,
                                     total_sequenced_reads=reads,
                                     total_autosomal_coverage=cov)
        LOG.info('Updating sample "{}" in '
                 'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"'.format(sample_id, dup_pc, cov))
    except CharonError as e:
        error_text = ('Could not update project/sample "{}/{}" '
                    'in Charon with duplication rate : {}'
                      'and coverage {}'.format("{}/{}".format(project_id, sampleid, dup_pc, cov)))
        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id, sample_name=sample_id,
                          engine_name="piper_ngi", level="ERROR", info_text=error_text)
示例#16
0
def main(project):

    charon_session = CharonSession()
    samples = charon_session.project_get_samples(project)
    for sample in samples["samples"]:

        charon_session.sample_update(project,
                                     sample["sampleid"],
                                     analysis_status="TO_ANALYZE",
                                     genotype_status=None,
                                     total_autosomal_coverage="0",
                                     total_sequenced_reads="0")
        for sample_prep in charon_session.sample_get_libpreps(
                project, sample["sampleid"])['libpreps']:
            seqruns = charon_session.libprep_get_seqruns(
                project, sample["sampleid"],
                sample_prep["libprepid"])['seqruns']
            for seqrun in seqruns:
                charon_session.seqrun_update(project,
                                             sample["sampleid"],
                                             sample_prep["libprepid"],
                                             seqrun["seqrunid"],
                                             mean_autosomal_coverage="0",
                                             alignment_status="NOT_RUNNING")
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    if quiet and not config.get("quiet"):
        config['quiet'] = True
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine = sample_entry.engine
            # Only one of these id fields (slurm, pid) will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            if workflow not in ("merge_process_variantcall", "genotype_concordance",):
                LOG.error('Unknown workflow "{}" for {}; cannot update '
                          'Charon. Skipping sample.'.format(workflow, label))
                continue

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for {} run {}/{}: {}'.format(workflow,
                                                                   project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name,
                                  sample_name=sample_id,
                                  engine_name=engine,
                                  level="ERROR",
                                  info_text=error_text,
                                  workflow=workflow)
                continue
            try:
                if piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                        set_status = "ANALYZED" # sample level
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                        set_status = "DONE" # sample level
                    recurse_status = "DONE" # For the seqrun level
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow,
                                                                        label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="INFO",
                                      info_text=info_text,
                                      workflow=workflow)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj,
                                              status_field=seqrun_status_field,
                                              status_value=recurse_status,
                                              config=config)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    #run MultiQC
                    LOG.info("Running MultiQC on project {}".format(project_name))
                    try:
                        run_multiqc(project_base_path, project_id, project_name)
                    except Exception as e:
                        LOG.error(e)


                    if workflow == "merge_process_variantcall":
                        # Parse seqrun output results / update Charon
                        # This is a semi-optional step -- failure here will send an
                        # email but not more than once. The record is still removed
                        # from the local jobs database, so this will have to be done
                        # manually if you want it done at all.
                        piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "02_preliminary_alignment_qc")
                        update_coverage_for_sample_seqruns(project_id, sample_id,
                                                           piper_qc_dir)
                        update_sample_duplication_and_coverage(project_id, sample_id,
                                                           project_base_path)

                        
                    elif workflow == "genotype_concordance":
                        piper_gt_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "03_genotype_concordance")
                        try:
                            update_gtc_for_sample(project_id, sample_id, piper_gt_dir)
                        except (CharonError, IOError, ValueError) as e:
                            LOG.error(e)
                elif type(piper_exit_code) is int and piper_exit_code > 0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                  '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="ERROR",
                                      info_text=error_text,
                                      workflow=workflow)
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj, status_field=seqrun_status_field,
                                              status_value=set_status, config=config)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running '
                                      'for {} / {}: setting status to {} in '
                                      'Charon'.format(label, workflow, set_status))
                        if slurm_job_id:
                            exit_code_file_path = \
                                create_exit_code_file_path(workflow_subtask=workflow,
                                                           project_base_path=project_base_path,
                                                           project_name=project_name,
                                                           project_id=project_id,
                                                           sample_id=sample_id)
                            error_text += (' (slurm job id "{}", exit code file path '
                                           '"{}")'.format(slurm_job_id, exit_code_file_path))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name,
                                          sample_name=sample_id,
                                          engine_name=engine, level="ERROR",
                                          info_text=error_text,
                                          workflow=workflow)
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     **{sample_status_field: set_status})
                        recurse_status_for_sample(project_obj,
                                                  status_field=seqrun_status_field,
                                                  status_value=set_status,
                                                  config=config)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        set_status = "UNDER_ANALYSIS"
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                            recurse_status = "RUNNING"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                            recurse_status = "UNDER_ANALYSIS"
                        try:
                            charon_status = \
                                    charon_session.sample_get(projectid=project_id,
                                                              sampleid=sample_id).get(sample_status_field)
                            if charon_status and not charon_status == set_status:
                                LOG.warn('Tracking inconsistency for {}: Charon status '
                                         'for field "{}" is "{}" but local process tracking '
                                         'database indicates it is running. Setting value '
                                         'in Charon to {}.'.format(label, sample_status_field,
                                                                   charon_status, set_status))
                                charon_session.sample_update(projectid=project_id,
                                                             sampleid=sample_id,
                                                             **{sample_status_field: set_status})
                                recurse_status_for_sample(project_obj,
                                                          status_field=seqrun_status_field,
                                                          status_value=recurse_status,
                                                          config=config)
                        except CharonError as e:
                            error_text = ('Unable to update/verify Charon '
                                          'for {}: {}'.format(label, e))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name, sample_name=sample_id,
                                              engine_name=engine, level="ERROR",
                                              workflow=workflow, info_text=error_text)
            except CharonError as e:
                error_text = ('Unable to update Charon for {}: '
                              '{}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              '"{}" status for "{}": {}'.format(workflow, label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
        session.commit()
示例#18
0
def update_charon_with_local_jobs_status(config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine=sample_entry.engine
            # Only one of these will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for sample run {}/{}: {}'.format(project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
                continue
            try:
                if piper_exit_code and piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    set_status = "ANALYZED"
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow, label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="INFO", info_text=info_text)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 analysis_status=set_status)
                    recurse_status="DONE"
                    recurse_status_for_sample(project_obj, recurse_status)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    # Parse seqrun output results / update Charon
                    # This is a semi-optional step -- failure here will send an
                    # email but not more than once. The record is still removed
                    # from the local jobs database, so this will have to be done
                    # manually if you want it done at all.
                    piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                project_id,"piper_ngi",  "02_preliminary_alignment_qc")
                    update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir)
                elif piper_exit_code and piper_exit_code >0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                 '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR", info_text=error_text)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 analysis_status=set_status)
                    recurse_status_for_sample(project_obj, set_status)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running for '
                                      '{}: setting status to {} in Charon'.format(label, set_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name, sample_name=sample_id,
                                      engine_name=engine, level="ERROR", info_text=error_text)
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     analysis_status=set_status)
                        recurse_status_for_sample(project_obj, set_status)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        charon_status = charon_session.sample_get(projectid=project_id,
                                                                  sampleid=sample_id)['analysis_status']
                        if not charon_status == "UNDER_ANALYSIS":
                            set_status = "UNDER_ANALYSIS"
                            LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but '
                                     'local process tracking database indicates it is running. '
                                     'Setting value in Charon to {}.'.format(label, charon_status,
                                                                             set_status))
                            charon_session.sample_update(projectid=project_id,
                                                         sampleid=sample_id,
                                                         analysis_status=set_status)
                            recurse_status_for_sample(project_obj, "RUNNING")
            except CharonError as e:
                error_text = ('Unable to update Charon status for "{}": {}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              'status for "{}": {}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
        session.commit()
示例#19
0
def create_charon_entries_from_project(project, best_practice_analysis="whole_genome_reseq",
                                       sequencing_facility="NGI-S",
                                       force_overwrite=False, delete_existing=False,
                                       retry_on_fail=True):
    """Given a project object, creates the relevant entries in Charon.
    This code is remarkably shoddy as I created it in a hurry and then later
    it became a part of the pipeline. Use at your own risk! Ha ha.

    :param NGIProject project: The NGIProject object
    :param str best_practice_analysis: The workflow to assign for this project (default "variant_calling")
    :param str sequencing_facility: The facility that did the sequencing
    :param bool force_overwrite: If this is set to true, overwrite existing entries in Charon (default false)
    :param bool delete_existing: Don't just update existing entries, delete them and create new ones (default false)
    """
    charon_session = CharonSession()
    update_failed=False
    try:
        status = "OPEN"
        LOG.info('Creating project "{}" with status "{}", best practice analysis "{}", '
                 'and sequencing_facility {}'.format(project, status,
                                                     best_practice_analysis,
                                                     sequencing_facility))
        charon_session.project_create(projectid=project.project_id,
                                      name=project.name,
                                      status=status,
                                      best_practice_analysis=best_practice_analysis,
                                      sequencing_facility=sequencing_facility)
        LOG.info('Project "{}" created in Charon.'.format(project))
    except CharonError as e:
        if e.status_code == 400:
            if force_overwrite:
                LOG.warn('Overwriting data for project "{}"'.format(project))
                charon_session.project_update(projectid=project.project_id,
                                              name=project.name,
                                              status=status,
                                              best_practice_analysis=best_practice_analysis,
                                              sequencing_facility=sequencing_facility)
                LOG.info('Project "{}" updated in Charon.'.format(project))
            else:
                LOG.info('Project "{}" already exists; moving to samples...'.format(project))
        else:
            raise
    for sample in project:
        if delete_existing:
            LOG.warn('Deleting existing sample "{}"'.format(sample))
            try:
                charon_session.sample_delete(projectid=project.project_id,
                                             sampleid=sample.name)
            except CharonError as e:
                update_failed=True
                LOG.error('Could not delete sample "{}": {}'.format(sample, e))
        try:
            analysis_status = "TO_ANALYZE"
            LOG.info('Creating sample "{}" with analysis_status "{}"'.format(sample, analysis_status))
            charon_session.sample_create(projectid=project.project_id,
                                         sampleid=sample.name,
                                         analysis_status=analysis_status)
            LOG.info('Project/sample "{}/{}" created in Charon.'.format(project, sample))
        except CharonError as e:
            if e.status_code == 400:
                if force_overwrite:
                    LOG.warn('Overwriting data for project "{}" / '
                             'sample "{}"'.format(project, sample))
                    charon_session.sample_update(projectid=project.project_id,
                                                 sampleid=sample.name,
                                                 analysis_status=analysis_status)
                    LOG.info('Project/sample "{}/{}" updated in Charon.'.format(project, sample))
                else:
                    LOG.info('Project "{}" / sample "{}" already exists; moving '
                             'to libpreps'.format(project, sample))
            else:
                update_failed=True
                LOG.error(e)
                continue
        for libprep in sample:
            if delete_existing:
                LOG.warn('Deleting existing libprep "{}"'.format(libprep))
                try:
                    charon_session.libprep_delete(projectid=project.project_id,
                                                  sampleid=sample.name,
                                                  libprepid=libprep.name)
                except CharonError as e:
                    LOG.warn('Could not delete libprep "{}": {}'.format(libprep, e))
            try:
                qc = "PASSED"
                LOG.info('Creating libprep "{}" with qc status "{}"'.format(libprep, qc))
                charon_session.libprep_create(projectid=project.project_id,
                                              sampleid=sample.name,
                                              libprepid=libprep.name,
                                              qc=qc)
                LOG.info(('Project/sample/libprep "{}/{}/{}" created in '
                          'Charon').format(project, sample, libprep))
            except CharonError as e:
                if e.status_code == 400:
                    if force_overwrite:
                        LOG.warn('Overwriting data for project "{}" / '
                                 'sample "{}" / libprep "{}"'.format(project, sample,
                                                                     libprep))
                        charon_session.libprep_update(projectid=project.project_id,
                                                      sampleid=sample.name,
                                                      libprepid=libprep.name,
                                                      qc=qc)
                        LOG.info(('Project/sample/libprep "{}/{}/{}" updated in '
                                  'Charon').format(project, sample, libprep))
                    else:
                        LOG.debug(e)
                        LOG.info('Project "{}" / sample "{}" / libprep "{}" already '
                                 'exists; moving to libpreps'.format(project, sample, libprep))
                else:
                    update_failed=True
                    LOG.error(e)
                    continue
            for seqrun in libprep:
                if delete_existing:
                    LOG.warn('Deleting existing seqrun "{}"'.format(seqrun))
                    try:
                        charon_session.seqrun_delete(projectid=project.project_id,
                                                     sampleid=sample.name,
                                                     libprepid=libprep.name,
                                                     seqrunid=seqrun.name)
                    except CharonError as e:
                        update_failed=True
                        LOG.error('Could not delete seqrun "{}": {}'.format(seqrun, e))
                try:
                    alignment_status="NOT_RUNNING"
                    LOG.info('Creating seqrun "{}" with alignment_status "{}"'.format(seqrun, alignment_status))
                    charon_session.seqrun_create(projectid=project.project_id,
                                                 sampleid=sample.name,
                                                 libprepid=libprep.name,
                                                 seqrunid=seqrun.name,
                                                 alignment_status=alignment_status,
                                                 total_reads=0,
                                                 mean_autosomal_coverage=0)
                    LOG.info(('Project/sample/libprep/seqrun "{}/{}/{}/{}" '
                              'created in Charon').format(project, sample,
                                                          libprep, seqrun))
                except CharonError as e:
                    if e.status_code == 400:
                        if force_overwrite:
                            LOG.warn('Overwriting data for project "{}" / '
                                     'sample "{}" / libprep "{}" / '
                                     'seqrun "{}"'.format(project, sample,
                                                          libprep, seqrun))
                            charon_session.seqrun_update(projectid=project.project_id,
                                                         sampleid=sample.name,
                                                         libprepid=libprep.name,
                                                         seqrunid=seqrun.name,
                                                         alignment_status=alignment_status,
                                                         total_reads=0,
                                                         mean_autosomal_coverage=0)
                            LOG.info(('Project/sample/libprep/seqrun "{}/{}/{}/{}" '
                                      'updated in Charon').format(project, sample,
                                                                  libprep, seqrun))
                        else:
                            LOG.info('Project "{}" / sample "{}" / libprep "{}" / '
                                     'seqrun "{}" already exists; next...'.format(project, sample,
                                                                                  libprep, seqrun))
                    else:
                        update_failed=True
                        LOG.error(e)
                        continue

    if update_failed :
        if retry_on_fail:
            create_charon_entries_from_project(project, best_practice_analysis=best_practice_analysis,
                                       sequencing_facility=sequencing_facility,
                                       force_overwrite=force_overwrite, delete_existing=delete_existing,
                                       retry_on_fail=False)
        else:
            raise CharonError("A network error blocks Charon updating.")
示例#20
0
def kill_running_sample_analysis(workflow_subtask, project_id, sample_id):
    """Determine if a sample is currently being analyzed by accessing the local
    process tracking database."""
    sample_run_name = "{}/{}".format(project_id, sample_id)
    LOG.info('Attempting to kill sample analysis run "{}"'.format(sample_run_name))
    LOG.info('Checking if sample run "{}" is currently being analyzed '
             '(workflow "{}")...'.format(sample_run_name, workflow_subtask))
    with get_db_session() as session:
        db_q = session.query(SampleAnalysis).filter_by(workflow=workflow_subtask,
                                                       project_id=project_id,
                                                       sample_id=sample_id)
        sample_run = db_q.first()
        if sample_run:
            try:
                slurm_job_id = sample_run.slurm_job_id
                LOG.info('...sample run "{}" is currently being analyzed '
                         '(workflow subtask "{}") and has slurm job id "{}"; '
                         'trying to kill it...'.format(sample_run_name,
                                                       workflow_subtask,
                                                       slurm_job_id))
                kill_slurm_job_by_id(slurm_job_id)
            except Exception as e:
                LOG.error('Could not kill sample run "{}": {}'.format(sample_run_name, e))
                return False
            try:
                project_obj = create_project_obj_from_analysis_log(sample_run.project_name,
                                                                   sample_run.project_id,
                                                                   sample_run.project_base_path,
                                                                   sample_run.sample_id,
                                                                   sample_run.workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for {} run {}/{}: {}'.format(sample_run.workflow,
                                                                   sample_run.project_id,
                                                                   sample_run.sample_id,
                                                                   e))
                LOG.error(error_text)
            else:
                try:
                    charon_session = CharonSession()
                    set_status = "FAILED"
                    if workflow_subtask == "genotype_concordance":
                        status_field = "genotype_status"
                    elif workflow_subtask == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj,
                                              status_field=seqrun_status_field,
                                              status_value=set_status)
                except CharonError as e:
                    LOG.error('Couldn\'t update Charon field "{}" to "{} for '
                              'project/sample "{}/{}"'.format(status_field, set_status,
                                                              project_id, sample_id))
            try:
                LOG.info('Removing sample run "{}" from local jobs database...'.format(sample_run_name))
                # Remove from local jobs database
                session.delete(sample_run)
                session.commit()
                LOG.info("Deleted.")
            except Exception as e:
                LOG.error('Failed to remove entry for sample run "{}" from '
                          'local jobs database: {}'.format(sample_run_name, e))
        else:
            LOG.info('...sample run "{}" is not currently under analysis.'.format(sample_run_name))
    return True
示例#21
0
def launch_analysis(level, projects_to_analyze, restart_failed_jobs=False,
                    config=None, config_file_path=None):
    """Launch the appropriate seqrun (flowcell-level) analysis for each fastq
    file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    # Update Charon with the local state of all the jobs we're running
    update_charon_with_local_jobs_status()
    charon_session = CharonSession()
    for project in projects_to_analyze:
        # Get information from Charon regarding which workflows to run
        try:
            # E.g. "NGI" for NGI DNA Samples
            workflow = charon_session.project_get(project.project_id)["pipeline"]
        except (KeyError, CharonError) as e:
            # Workflow missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(project, e))
            continue
        try:
            analysis_engine_module_name = config["analysis"]["workflows"][workflow]["analysis_engine"]
        except KeyError:
            error_msg = ("No analysis engine for workflow \"{}\" specified "
                         "in configuration file. Skipping this workflow "
                         "for project {}".format(workflow, project))
            LOG.error(error_msg)
            raise RuntimeError(error_msg)
        # Import the adapter module specified in the config file (e.g. piper_ngi)
        try:
            analysis_module = importlib.import_module(analysis_engine_module_name)
        except ImportError as e:
            error_msg = ('Skipping project "{}" workflow "{}": couldn\'t import '
                         'module "{}": {}'.format(project, workflow, analysis_engine_module_name, e))
            LOG.error(error_msg)
            # Next project
            continue

        # This is weird
        objects_to_process = []
        if level == "sample":
            for sample in project:
                objects_to_process.append({"project": project, "sample": sample})
        elif level == "seqrun":
            for sample in project:
                for libprep in sample:
                    for seqrun in libprep:
                        objects_to_process.append({"project": project,
                                                   "sample": sample,
                                                   "libprep": libprep,
                                                   "seqrun": seqrun})
        # Still weird and not so great
        for obj_dict in objects_to_process:
            project = obj_dict.get("project")
            sample = obj_dict.get("sample")
            libprep = obj_dict.get("libprep")
            seqrun = obj_dict.get("seqrun")

            try:
                if level == "seqrun":
                    charon_reported_status = charon_session.seqrun_get(project.project_id,
                                                                       sample, libprep,
                                                                       seqrun)['alignment_status']
                else: # sample-level
                    charon_reported_status = charon_session.sample_get(project.project_id,
                                                                       sample)['status']
            except (CharonError, KeyError) as e:
                LOG.warn('Unable to get required information from Charon for '
                          'sample "{}" / project "{}" -- forcing it to new: {}'.format(sample, project, e))
                if level == "seqrun":
                    charon_session.seqrun_update(project.project_id, sample.name, libprep.name, seqrun.name, alignment_status="NEW")
                    charon_reported_status = charon_session.seqrun_get(project.project_id,
                                                                       sample, libprep,
                                                                       seqrun)['alignment_status']
                else:
                    charon_session.sample_update(project.project_id, sample.name, status="NEW")
                    charon_reported_status = charon_session.sample_get(project.project_id,
                                                                       sample)['status']

            # Check Charon to ensure this hasn't already been processed
            if charon_reported_status in ("RUNNING", "DONE"):
                if level == "seqrun":
                    LOG.info('Charon reports seqrun analysis for project "{}" / sample "{}" '
                             '/ libprep "{}" / seqrun "{}" does not need processing '
                             ' (already "{}")'.format(project, sample, libprep, seqrun,
                                                      charon_reported_status))
                else: # Sample
                    LOG.info('Charon reports seqrun analysis for project "{}" / sample "{}" '
                             'does not need processing '
                             ' (already "{}")'.format(project, sample, charon_reported_status))
                continue
            elif charon_reported_status == "FAILED":
                if not restart_failed_jobs:
                    if level == "seqrun":
                        LOG.error('FAILED:  Project "{}" / sample "{}" / library "{}" '
                                  '/ flowcell "{}": Charon reports FAILURE, manual '
                                  'investigation needed!'.format(project, sample, libprep, seqrun))
                    else: # Sample
                        LOG.error('FAILED:  Project "{}" / sample "{}" Charon reports FAILURE, manual '
                                  'investigation needed!'.format(project, sample, libprep, seqrun))
                    continue
            try:
                # The engines themselves know which sub-workflows
                # they need to execute for a given level. For example,
                # with DNA Variant Calling on the sequencing run
                # level, we need to execute basic alignment and QC.
                if level == "seqrun":
                    LOG.info('Attempting to launch seqrun analysis for '
                             'project "{}" / sample "{}" / libprep "{}" '
                             '/ seqrun "{}", workflow "{}"'.format(project,
                                                                   sample,
                                                                   libprep,
                                                                   seqrun,
                                                                   workflow))
                    analysis_module.analyze_seqrun(project=project,
                                                   sample=sample,
                                                   libprep=libprep,
                                                   seqrun=seqrun)
                else: # sample level
                    LOG.info('Attempting to launch sample analysis for '
                             'project "{}" / sample "{}" / workflow '
                             '"{}"'.format(project, sample, workflow))
                    analysis_module.analyze_sample(project=project,
                                                   sample=sample)

            except Exception as e:
                raise
                LOG.error('Cannot process project "{}" / sample "{}" / '
                          'libprep "{}" / seqrun "{}" / workflow '
                          '"{}" : {}'.format(project, sample, libprep,
                                             seqrun, workflow, e))
                set_new_seqrun_status = "FAILED"
                continue
def update_charon_with_local_jobs_status():
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()

        # Sequencing Run Analyses
        for seqrun_entry in session.query(SeqrunAnalysis).all():

            # Local names
            workflow = seqrun_entry.workflow
            project_name = seqrun_entry.project_name
            project_id = seqrun_entry.project_id
            project_base_path = seqrun_entry.project_base_path
            sample_id = seqrun_entry.sample_id
            libprep_id = seqrun_entry.libprep_id
            seqrun_id = seqrun_entry.seqrun_id
            pid = seqrun_entry.process_id

            exit_code = get_exit_code(workflow_name=workflow,
                                      project_base_path=project_base_path,
                                      project_name=project_name,
                                      sample_id=sample_id,
                                      libprep_id=libprep_id,
                                      seqrun_id=seqrun_id)
            label = "project/sample/libprep/seqrun {}/{}/{}/{}".format(project_name,
                                                                       sample_id,
                                                                       libprep_id,
                                                                       seqrun_id)
            try:
                if exit_code == 0:
                    # 0 -> Job finished successfully
                    LOG.info('Workflow "{}" for {} finished succesfully. '
                             'Recording status "DONE" in Charon'.format(workflow, label))
                    set_alignment_status = "DONE"
                    try:
                        write_to_charon_alignment_results(base_path=project_base_path,
                                                          project_name=project_name,
                                                          project_id=project_id,
                                                          sample_id=sample_id,
                                                          libprep_id=libprep_id,
                                                          seqrun_id=seqrun_id)
                    except (RuntimeError, ValueError) as e:
                        LOG.error(e)
                        set_alignment_status = "FAILED"
                    charon_session.seqrun_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 libprepid=libprep_id,
                                                 seqrunid=seqrun_id,
                                                 alignment_status=set_alignment_status)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(seqrun_entry)
                elif exit_code == 1 or (not psutil.pid_exists(pid) and not exit_code):
                    if exit_code == 1:
                        # 1 -> Job failed (DATA_FAILURE / COMPUTATION_FAILURE ?)
                        LOG.info('Workflow "{}" for {} failed. Recording status '
                                 '"FAILED" in Charon.'.format(workflow, label))
                    else:
                        # Job failed without writing an exit code (process no longer running)
                        LOG.error('ERROR: No exit code found for process {} '
                                  'but it does not appear to be running '
                                  '(pid {} does not exist). Setting status to '
                                  '"FAILED", inspect manually'.format(label, pid))
                    charon_session.seqrun_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 libprepid=libprep_id,
                                                 seqrunid=seqrun_id,
                                                 alignment_status="FAILED")
                    # Job is only deleted if the Charon update succeeds
                    LOG.debug("Deleting local entry {}".format(seqrun_entry))
                    session.delete(seqrun_entry)
                else:
                    # None -> Job still running
                    charon_status = charon_session.seqrun_get(projectid=project_id,
                                                              sampleid=sample_id,
                                                              libprepid=libprep_id,
                                                              seqrunid=seqrun_id)['alignment_status']
                    if not charon_status == "RUNNING":
                        LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but '
                                 'local process tracking database indicates it is running. '
                                 'Setting value in Charon to RUNNING.'.format(label, charon_status))
                        charon_session.seqrun_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     libprepid=libprep_id,
                                                     seqrunid=seqrun_id,
                                                     alignment_status="RUNNING")
            except CharonError as e:
                LOG.error('Unable to update Charon status for "{}": {}'.format(label, e))


        for sample_entry in session.query(SampleAnalysis).all():

            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            pid = sample_entry.process_id

            exit_code = get_exit_code(workflow_name=workflow,
                                      project_base_path=project_base_path,
                                      project_name=project_name,
                                      sample_id=sample_id)
            label = "project/sample/libprep/seqrun {}/{}".format(project_name,
                                                                       sample_id)
            try:
                if exit_code == 0:
                    # 0 -> Job finished successfully
                    LOG.info('Workflow "{}" for {} finished succesfully. '
                             'Recording status "DONE" in Charon'.format(workflow, label))
                    set_status = "DONE"
                    ## TODO implement sample-level analysis results parsing / reporting to Charon?
                    #try:
                    #    write_to_charon_alignment_results(base_path=project_base_path,
                    #                                      project_name=project_name,
                    #                                      project_id=project_id,
                    #                                      sample_id=sample_id,
                    #                                      libprep_id=libprep_id,
                    #                                      seqrun_id=seqrun_id)
                    #except (RuntimeError, ValueError) as e:
                    #    LOG.error(e)
                    #    set_alignment_status = "FAILED"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 status=set_status)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                elif exit_code == 1 or (not psutil.pid_exists(pid) and not exit_code):
                    if exit_code == 1:
                        # 1 -> Job failed (DATA_FAILURE / COMPUTATION_FAILURE ?)
                        LOG.info('Workflow "{}" for {} failed. Recording status '
                                 '"COMPUTATION_FAILED" in Charon.'.format(workflow, label))
                    else:
                        # Job failed without writing an exit code
                        LOG.error('ERROR: No exit code found for process {} '
                                  'but it does not appear to be running '
                                  '(pid {} does not exist). Setting status to '
                                  '"COMPUTATION_FAILED", inspect manually'.format(label, pid))
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 status="COMPUTATION_FAILED")
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running
                    try:
                        charon_status = charon_session.sample_get(projectid=project_id,
                                                              sampleid=sample_id)['status']
                    except (CharonError, KeyError) as e:
                        LOG.warn('Unable to get required information from Charon for '
                          'sample "{}" / project "{}" -- forcing it to RUNNING: {}'.format(sample_id, project_id, e))
                        charon_status = "NEW"

                    if not charon_status == "RUNNING":
                        LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but '
                                 'local process tracking database indicates it is running. '
                                 'Setting value in Charon to RUNNING.'.format(label, charon_status))
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     status="RUNNING")
            except CharonError as e:
                LOG.error('Unable to update Charon status for "{}": {}'.format(label, e))
        session.commit()
示例#23
0
def create_charon_entries_from_project(project, best_practice_analysis="whole_genome_reseq",
                                       sequencing_facility="NGI-S",
                                       force_overwrite=False, delete_existing=False):
    """Given a project object, creates the relevant entries
    in Charon.

    :param NGIProject project: The NGIProject object
    :param str best_practice_analysis: The workflow to assign for this project (default "variant_calling")
    :param str sequencing_facility: The facility that did the sequencing
    :param bool force_overwrite: If this is set to true, overwrite existing entries in Charon (default false)
    :param bool delete_existing: Don't just update existing entries, delete them and create new ones (default false)
    """
    charon_session = CharonSession()
    try:
        status="OPEN"
        LOG.info('Creating project "{}" with status "{}", best practice analysis "{}", '
                 'and sequencing_facility {}'.format(project, status, best_practice_analysis,
                                                   sequencing_facility))
        charon_session.project_create(projectid=project.project_id,
                                      name=project.name,
                                      status=status,
                                      best_practice_analysis=best_practice_analysis,
                                      sequencing_facility=sequencing_facility)
        LOG.info('Project "{}" created in Charon.'.format(project))
    except CharonError as e:
        if e.status_code == 400:
            if force_overwrite:
                LOG.warn('Overwriting data for project "{}"'.format(project))
                charon_session.project_update(projectid=project.project_id,
                                              name=project.name,
                                              status=status,
                                              best_practice_analysis=best_practice_analysis,
                                              sequencing_facility=sequencing_facility)
                LOG.info('Project "{}" updated in Charon.'.format(project))
            else:
                LOG.info('Project "{}" already exists; moving to samples...'.format(project))
        else:
            raise
    for sample in project:
        if delete_existing:
            LOG.warn('Deleting existing sample "{}"'.format(sample))
            try:
                charon_session.sample_delete(projectid=project.project_id,
                                             sampleid=sample.name)
            except CharonError as e:
                LOG.error('Could not delete sample "{}": {}'.format(sample, e))
        try:
            analysis_status = "TO_ANALYZE"
            LOG.info('Creating sample "{}" with analysis_status "{}"'.format(sample, analysis_status))
            charon_session.sample_create(projectid=project.project_id,
                                         sampleid=sample.name,
                                         analysis_status=analysis_status)
            LOG.info('Project/sample "{}/{}" created in Charon.'.format(project, sample))
        except CharonError as e:
            if e.status_code == 400:
                if force_overwrite:
                    LOG.warn('Overwriting data for project "{}" / '
                             'sample "{}"'.format(project, sample))
                    charon_session.sample_update(projectid=project.project_id,
                                                 sampleid=sample.name,
                                                 analysis_status=analysis_status)
                    LOG.info('Project/sample "{}/{}" updated in Charon.'.format(project, sample))
                else:
                    LOG.info('Project "{}" / sample "{}" already exists; moving '
                             'to libpreps'.format(project, sample))
            else:
                LOG.error(e)
                continue
        for libprep in sample:
            if delete_existing:
                LOG.warn('Deleting existing libprep "{}"'.format(libprep))
                try:
                    charon_session.libprep_delete(projectid=project.project_id,
                                                 sampleid=sample.name,
                                                 libprepid=libprep.name)
                except CharonError as e:
                    LOG.warn('Could not delete libprep "{}": {}'.format(libprep, e))
            try:
                qc= "PASSED"
                LOG.info('Creating libprep "{}" with qc status "{}"'.format(libprep, qc))
                charon_session.libprep_create(projectid=project.project_id,
                                              sampleid=sample.name,
                                              libprepid=libprep.name,
                                              qc=qc)
                LOG.info(('Project/sample/libprep "{}/{}/{}" created in '
                          'Charon').format(project, sample, libprep))
            except CharonError as e:
                if e.status_code == 400:
                    if force_overwrite:
                        LOG.warn('Overwriting data for project "{}" / '
                                 'sample "{}" / libprep "{}"'.format(project, sample,
                                                                     libprep))
                        charon_session.libprep_update(projectid=project.project_id,
                                                      sampleid=sample.name,
                                                      libprepid=libprep.name,
                                                      qc=qc)
                        LOG.info(('Project/sample/libprep "{}/{}/{}" updated in '
                                  'Charon').format(project, sample, libprep))
                    else:
                        LOG.info(e)
                        LOG.info('Project "{}" / sample "{}" / libprep "{}" already '
                                 'exists; moving to libpreps'.format(project, sample, libprep))
                else:
                    LOG.error(e)
                    continue
            for seqrun in libprep:
                if delete_existing:
                    LOG.warn('Deleting existing seqrun "{}"'.format(seqrun))
                    try:
                        charon_session.seqrun_delete(projectid=project.project_id,
                                                     sampleid=sample.name,
                                                     libprepid=libprep.name,
                                                     seqrunid=seqrun.name)
                    except CharonError as e:
                        LOG.error('Could not delete seqrun "{}": {}'.format(seqrun, e))
                try:
                    alignment_status="NOT_RUNNING"
                    LOG.info('Creating seqrun "{}" with alignment_status "{}"'.format(seqrun, alignment_status))
                    charon_session.seqrun_create(projectid=project.project_id,
                                                 sampleid=sample.name,
                                                 libprepid=libprep.name,
                                                 seqrunid=seqrun.name,
                                                 alignment_status=alignment_status,
                                                 total_reads=0,
                                                 mean_autosomal_coverage=0)
                    LOG.info(('Project/sample/libprep/seqrun "{}/{}/{}/{}" '
                              'created in Charon').format(project, sample,
                                                          libprep, seqrun))
                except CharonError as e:
                    if e.status_code == 400:
                        if force_overwrite:
                            LOG.warn('Overwriting data for project "{}" / '
                                     'sample "{}" / libprep "{}" / '
                                     'seqrun "{}"'.format(project, sample,
                                                          libprep, seqrun))
                            charon_session.seqrun_update(projectid=project.project_id,
                                                         sampleid=sample.name,
                                                         libprepid=libprep.name,
                                                         seqrunid=seqrun.name,
                                                         alignment_status=alignment_status,
                                                         total_reads=0,
                                                         mean_autosomal_coverage=0)
                            LOG.info(('Project/sample/libprep/seqrun "{}/{}/{}/{}" '
                                      'updated in Charon').format(project, sample,
                                                                  libprep, seqrun))
                        else:
                            LOG.info('Project "{}" / sample "{}" / libprep "{}" / '
                                     'seqrun "{}" already exists; next...'.format(project, sample,
                                                                                  libprep, seqrun))
                    else:
                        LOG.error(e)
                        continue
def main(inbox=None,
         num_days=14,
         genotype_files=None,
         config=None,
         config_file_path=None):
    if genotype_files:
        gt_files_valid = [
            os.path.abspath(gt_file) for gt_file in genotype_files
        ]
    else:
        if not inbox:
            try:
                inboxes = config["environment"]["flowcell_inbox"]
            except (KeyError, TypeError):
                raise ValueError(
                    "No path to delivery inbox specified by argument "
                    "or in configuration file ({}). Exiting.".format(
                        config_file_path))
        for inbox in inboxes:
            inbox = os.path.abspath(inbox)
            # Convert to seconds
            cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60)
            LOG.info("Searching for genotype files under {} modified after "
                     "{}".format(inbox, time.ctime(cutoff_age)))
            gt_files_valid = []
            for gt_file in filter(GENOTYPE_FILE_RE.match,
                                  glob.glob(os.path.join(inbox, "*"))):
                if os.stat(gt_file).st_mtime > time.time() - cutoff_age:
                    gt_files_valid.append(os.path.abspath(gt_file))

    if not gt_files_valid:
        LOG.info("No genotype files found under {} newer than "
                 "{}".format(inbox, time.ctime(cutoff_age)))
    else:
        charon_session = CharonSession()
        for gt_file_path in gt_files_valid:
            project_samples_dict = \
                    find_projects_from_samples(parse_samples_from_vcf(gt_file_path))
            for project_id, samples in project_samples_dict.iteritems():
                LOG.info("Updating project {}...".format(project_id))
                for sample in samples:
                    try:
                        genotype_status = \
                            charon_session.sample_get(projectid=project_id,
                                                      sampleid=sample).get("genotype_status")
                        if genotype_status in (None, "NOT_AVAILABLE"):
                            LOG.info('Updating sample {} genotype_status '
                                     'to "AVAILABLE"...'.format(sample))
                            charon_session.sample_update(
                                projectid=project_id,
                                sampleid=sample,
                                genotype_status="AVAILABLE")
                        else:
                            LOG.info('Not updating sample {} genotype_status '
                                     '(already "{}")'.format(
                                         sample, genotype_status))
                    except CharonError as e:
                        LOG.error(
                            'Could not update genotype status to "AVAILABLE" '
                            'for project/sample "{}/{}": {}'.format(
                                project_id, sample, e))
示例#25
0
 def save_delivery_token_in_charon(self, delivery_token):
     '''Updates delivery_token in Charon at sample level
     '''
     charon_session = CharonSession()
     charon_session.sample_update(self.projectid, self.sampleid, delivery_token=delivery_token)
示例#26
0
def create_charon_entries_from_project(project, workflow="NGI", force_overwrite=False):
    """Given a project object, creates the relevant entries
    in Charon.

    :param NGIProject project: The NGIProject object
    :param str workflow: The workflow to assign for this project (default NGI)
    :param bool force_overwrite: If this is set to true, overwrite existing entries in Charon (default false)
    """
    charon_session = CharonSession()
    try:
        status="SEQUENCED"
        LOG.info('Creating project "{}" with status "{}" and workflow "{}"'.format(project, status, workflow))
        charon_session.project_create(projectid=project.project_id,
                                      name=project.name,
                                      status=status,
                                      pipeline=workflow)
    except CharonError:
        if force_overwrite:
            LOG.warn('Overwriting data for project "{}"'.format(project))
            charon_session.project_update(projectid=project.project_id,
                                          name=project.name,
                                          status=status,
                                          pipeline=workflow)
        else:
            LOG.info('Project "{}" already exists; moving to samples...'.format(project))

    for sample in project:
        try:
            LOG.info('Creating sample "{}"'.format(sample))
            charon_session.sample_create(projectid=project.project_id,
                                         sampleid=sample.name,
                                         status="NEW")
        except CharonError:
            if force_overwrite:
                LOG.warn('Overwriting data for project "{}" / '
                         'sample "{}"'.format(project, sample))
                charon_session.sample_update(projectid=project.project_id,
                                             sampleid=sample.name,
                                             status="NEW")
            else:
                LOG.info('Project "{}" / sample "{}" already exists; moving '
                         'to libpreps'.format(project, sample))

        for libprep in sample:
            try:
                LOG.info('Creating libprep "{}"'.format(libprep))
                charon_session.libprep_create(projectid=project.project_id,
                                              sampleid=sample.name,
                                              libprepid=libprep.name,
                                              status="NEW")
            except CharonError:
                if force_overwrite:
                    LOG.warn('Overwriting data for project "{}" / '
                             'sample "{}" / libprep "{}"'.format(project, sample,
                                                                 libprep))
                    charon_session.libprep_update(projectid=project.project_id,
                                                  sampleid=sample.name,
                                                  libprepid=libprep.name,
                                                  status="NEW")
                else:
                    LOG.info('Project "{}" / sample "{}" / libprep "{}" already '
                             'exists; moving to libpreps'.format(project, sample, libprep))

            for seqrun in libprep:
                try:
                    LOG.info('Creating seqrun "{}"'.format(seqrun))
                    charon_session.seqrun_create(projectid=project.project_id,
                                                 sampleid=sample.name,
                                                 libprepid=libprep.name,
                                                 seqrunid=seqrun.name,
                                                 total_reads=0,
                                                 mean_autosomal_coverage=0,
                                                 sequencing_status="DONE",
                                                 alignment_status="NEW")
                except CharonError as e:
                    if force_overwrite:
                        LOG.warn('Overwriting data for project "{}" / '
                                 'sample "{}" / libprep "{}" / '
                                 'seqrun "{}"'.format(project, sample,
                                                      libprep, seqrun))
                        charon_session.seqrun_update(projectid=project.project_id,
                                                     sampleid=sample.name,
                                                     libprepid=libprep.name,
                                                     seqrunid=seqrun.name,
                                                     status="NEW")
                    else:
                        LOG.info('Project "{}" / sample "{}" / libprep "{}" / '
                                 'seqrun "{}" already exists; next...'.format(project, sample,
                                                                              libprep, seqrun))
def kill_running_sample_analysis(workflow_subtask, project_id, sample_id):
    """Determine if a sample is currently being analyzed by accessing the local
    process tracking database."""
    sample_run_name = "{}/{}".format(project_id, sample_id)
    LOG.info('Attempting to kill sample analysis run "{}"'.format(sample_run_name))
    LOG.info('Checking if sample run "{}" is currently being analyzed '
             '(workflow "{}")...'.format(sample_run_name, workflow_subtask))
    with get_db_session() as session:
        db_q = session.query(SampleAnalysis).filter_by(workflow=workflow_subtask,
                                                       project_id=project_id,
                                                       sample_id=sample_id)
        sample_run = db_q.first()
        if sample_run:
            try:
                slurm_job_id = sample_run.slurm_job_id
                LOG.info('...sample run "{}" is currently being analyzed '
                         '(workflow subtask "{}") and has slurm job id "{}"; '
                         'trying to kill it...'.format(sample_run_name,
                                                       workflow_subtask,
                                                       slurm_job_id))
                kill_slurm_job_by_id(slurm_job_id)
            except Exception as e:
                LOG.error('Could not kill sample run "{}": {}'.format(sample_run_name, e))
                return False
            try:
                project_obj = create_project_obj_from_analysis_log(sample_run.project_name,
                                                                   sample_run.project_id,
                                                                   sample_run.project_base_path,
                                                                   sample_run.sample_id,
                                                                   sample_run.workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for {} run {}/{}: {}'.format(sample_run.workflow,
                                                                   sample_run.project_id,
                                                                   sample_run.sample_id,
                                                                   e))
                LOG.error(error_text)
            else:
                try:
                    charon_session = CharonSession()
                    set_status = "FAILED"
                    if workflow_subtask == "genotype_concordance":
                        status_field = "genotype_status"
                    elif workflow_subtask == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj,
                                              status_field=seqrun_status_field,
                                              status_value=set_status)
                except CharonError as e:
                    LOG.error('Couldn\'t update Charon field "{}" to "{} for '
                              'project/sample "{}/{}"'.format(status_field, set_status,
                                                              project_id, sample_id))
            try:
                LOG.info('Removing sample run "{}" from local jobs database...'.format(sample_run_name))
                # Remove from local jobs database
                session.delete(sample_run)
                session.commit()
                LOG.info("Deleted.")
            except Exception as e:
                LOG.error('Failed to remove entry for sample run "{}" from '
                          'local jobs database: {}'.format(sample_run_name, e))
        else:
            LOG.info('...sample run "{}" is not currently under analysis.'.format(sample_run_name))
    return True
示例#28
0
def create_charon_entries_from_project(
        project,
        best_practice_analysis="whole_genome_reseq",
        sequencing_facility="NGI-S",
        force_overwrite=False,
        retry_on_fail=True):
    """Given a project object, creates the relevant entries in Charon.
    This code is remarkably shoddy as I created it in a hurry and then later
    it became a part of the pipeline. Use at your own risk! Ha ha.

    :param NGIProject project: The NGIProject object
    :param str best_practice_analysis: The workflow to assign for this project (default "variant_calling")
    :param str sequencing_facility: The facility that did the sequencing
    :param bool force_overwrite: If this is set to true, overwrite existing entries in Charon (default false)
    """
    charon_session = CharonSession()
    update_failed = False
    try:
        status = "OPEN"
        LOG.info(
            'Creating project "{}" with status "{}", best practice analysis "{}", '
            'and sequencing_facility {}'.format(project, status,
                                                best_practice_analysis,
                                                sequencing_facility))
        charon_session.project_create(
            projectid=project.project_id,
            name=project.name,
            status=status,
            best_practice_analysis=best_practice_analysis,
            sequencing_facility=sequencing_facility)
        LOG.info('Project "{}" created in Charon.'.format(project))
    except CharonError as e:
        if e.status_code == 400:
            if force_overwrite:
                LOG.warning(
                    'Overwriting data for project "{}"'.format(project))
                charon_session.project_update(
                    projectid=project.project_id,
                    name=project.name,
                    status=status,
                    best_practice_analysis=best_practice_analysis,
                    sequencing_facility=sequencing_facility)
                LOG.info('Project "{}" updated in Charon.'.format(project))
            else:
                LOG.info(
                    'Project "{}" already exists; moving to samples...'.format(
                        project))
        else:
            raise
    for sample in project:
        try:
            analysis_status = "TO_ANALYZE"
            sample_data_status_value = "STALE"
            LOG.info('Creating sample "{}" with analysis_status "{}"'.format(
                sample, analysis_status))
            charon_session.sample_create(projectid=project.project_id,
                                         sampleid=sample.name,
                                         analysis_status=analysis_status)
            LOG.info('Project/sample "{}/{}" created in Charon.'.format(
                project, sample))
        except CharonError as e:
            if e.status_code == 400:
                if force_overwrite:
                    LOG.warning('Overwriting data for project "{}" / '
                                'sample "{}"'.format(project, sample))
                    charon_session.sample_update(
                        projectid=project.project_id,
                        sampleid=sample.name,
                        analysis_status=analysis_status,
                        status=sample_data_status_value)
                    LOG.info(
                        'Project/sample "{}/{}" updated in Charon.'.format(
                            project, sample))
                else:
                    #update the status of the sample to STALE
                    charon_session.sample_update(
                        projectid=project.project_id,
                        sampleid=sample.name,
                        status=sample_data_status_value)
                    LOG.info(
                        'Project "{}" / sample "{}" already exists; moving '
                        'to libpreps'.format(project, sample))
            else:
                update_failed = True
                LOG.error(e)
                continue
        for libprep in sample:
            try:
                qc = "PASSED"
                LOG.info('Creating libprep "{}" with qc status "{}"'.format(
                    libprep, qc))
                charon_session.libprep_create(projectid=project.project_id,
                                              sampleid=sample.name,
                                              libprepid=libprep.name,
                                              qc=qc)
                LOG.info(('Project/sample/libprep "{}/{}/{}" created in '
                          'Charon').format(project, sample, libprep))
            except CharonError as e:
                if e.status_code == 400:
                    if force_overwrite:
                        LOG.warning('Overwriting data for project "{}" / '
                                    'sample "{}" / libprep "{}"'.format(
                                        project, sample, libprep))
                        charon_session.libprep_update(
                            projectid=project.project_id,
                            sampleid=sample.name,
                            libprepid=libprep.name,
                            qc=qc)
                        LOG.info(
                            ('Project/sample/libprep "{}/{}/{}" updated in '
                             'Charon').format(project, sample, libprep))
                    else:
                        LOG.debug(e)
                        LOG.info(
                            'Project "{}" / sample "{}" / libprep "{}" already '
                            'exists; moving to libpreps'.format(
                                project, sample, libprep))
                else:
                    update_failed = True
                    LOG.error(e)
                    continue
            for seqrun in libprep:
                try:
                    alignment_status = "NOT_RUNNING"
                    LOG.info('Creating seqrun "{}" with alignment_status "{}"'.
                             format(seqrun, alignment_status))
                    charon_session.seqrun_create(
                        projectid=project.project_id,
                        sampleid=sample.name,
                        libprepid=libprep.name,
                        seqrunid=seqrun.name,
                        alignment_status=alignment_status,
                        total_reads=0,
                        mean_autosomal_coverage=0)
                    LOG.info(
                        ('Project/sample/libprep/seqrun "{}/{}/{}/{}" '
                         'created in Charon').format(project, sample, libprep,
                                                     seqrun))
                except CharonError as e:
                    if e.status_code == 400:
                        if force_overwrite:
                            LOG.warning('Overwriting data for project "{}" / '
                                        'sample "{}" / libprep "{}" / '
                                        'seqrun "{}"'.format(
                                            project, sample, libprep, seqrun))
                            charon_session.seqrun_update(
                                projectid=project.project_id,
                                sampleid=sample.name,
                                libprepid=libprep.name,
                                seqrunid=seqrun.name,
                                alignment_status=alignment_status,
                                total_reads=0,
                                mean_autosomal_coverage=0)
                            LOG.info(
                                ('Project/sample/libprep/seqrun "{}/{}/{}/{}" '
                                 'updated in Charon').format(
                                     project, sample, libprep, seqrun))
                        else:
                            LOG.info(
                                'Project "{}" / sample "{}" / libprep "{}" / '
                                'seqrun "{}" already exists; next...'.format(
                                    project, sample, libprep, seqrun))
                    else:
                        update_failed = True
                        LOG.error(e)
                        continue

    if update_failed:
        if retry_on_fail:
            create_charon_entries_from_project(
                project,
                best_practice_analysis=best_practice_analysis,
                sequencing_facility=sequencing_facility,
                force_overwrite=force_overwrite,
                retry_on_fail=False)
        else:
            raise CharonError("A network error blocks Charon updating.")
示例#29
0
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    if quiet and not config.get("quiet"):
        config['quiet'] = True
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    multiqc_projects=set()
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine = sample_entry.engine
            # Only one of these id fields (slurm, pid) will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            if workflow not in ("merge_process_variantcall", "genotype_concordance",):
                LOG.error('Unknown workflow "{}" for {}; cannot update '
                          'Charon. Skipping sample.'.format(workflow, label))
                continue

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for {} run {}/{}: {}'.format(workflow,
                                                                   project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name,
                                  sample_name=sample_id,
                                  engine_name=engine,
                                  level="ERROR",
                                  info_text=error_text,
                                  workflow=workflow)
                continue
            try:
                if piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                        set_status = "ANALYZED" # sample level
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                        set_status = "DONE" # sample level
                    recurse_status = "DONE" # For the seqrun level
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow,
                                                                        label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="INFO",
                                      info_text=info_text,
                                      workflow=workflow)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj,
                                              status_field=seqrun_status_field,
                                              status_value=recurse_status,
                                              config=config)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    #add project to MultiQC
                    multiqc_projects.add((project_base_path, project_id, project_name))


                    if workflow == "merge_process_variantcall":
                        # Parse seqrun output results / update Charon
                        # This is a semi-optional step -- failure here will send an
                        # email but not more than once. The record is still removed
                        # from the local jobs database, so this will have to be done
                        # manually if you want it done at all.
                        piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "02_preliminary_alignment_qc")
                        update_coverage_for_sample_seqruns(project_id, sample_id,
                                                           piper_qc_dir)
                        update_sample_duplication_and_coverage(project_id, sample_id,
                                                           project_base_path)

                        
                    elif workflow == "genotype_concordance":
                        piper_gt_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "03_genotype_concordance")
                        try:
                            update_gtc_for_sample(project_id, sample_id, piper_gt_dir)
                        except (CharonError, IOError, ValueError) as e:
                            LOG.error(e)
                elif type(piper_exit_code) is int and piper_exit_code > 0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                  '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="ERROR",
                                      info_text=error_text,
                                      workflow=workflow)
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj, status_field=seqrun_status_field,
                                              status_value=set_status, config=config)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running '
                                      'for {} / {}: setting status to {} in '
                                      'Charon'.format(label, workflow, set_status))
                        if slurm_job_id:
                            exit_code_file_path = \
                                create_exit_code_file_path(workflow_subtask=workflow,
                                                           project_base_path=project_base_path,
                                                           project_name=project_name,
                                                           project_id=project_id,
                                                           sample_id=sample_id)
                            error_text += (' (slurm job id "{}", exit code file path '
                                           '"{}")'.format(slurm_job_id, exit_code_file_path))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name,
                                          sample_name=sample_id,
                                          engine_name=engine, level="ERROR",
                                          info_text=error_text,
                                          workflow=workflow)
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     **{sample_status_field: set_status})
                        recurse_status_for_sample(project_obj,
                                                  status_field=seqrun_status_field,
                                                  status_value=set_status,
                                                  config=config)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        set_status = "UNDER_ANALYSIS"
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                            recurse_status = "RUNNING"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                            recurse_status = "UNDER_ANALYSIS"
                        try:
                            remote_sample=charon_session.sample_get(projectid=project_id, sampleid=sample_id)
                            charon_status = remote_sample.get(sample_status_field)
                            if charon_status and not charon_status == set_status:
                                LOG.warning('Tracking inconsistency for {}: Charon status '
                                         'for field "{}" is "{}" but local process tracking '
                                         'database indicates it is running. Setting value '
                                         'in Charon to {}.'.format(label, sample_status_field,
                                                                   charon_status, set_status))
                                charon_session.sample_update(projectid=project_id,
                                                             sampleid=sample_id,
                                                             **{sample_status_field: set_status})
                                recurse_status_for_sample(project_obj,
                                                          status_field=seqrun_status_field,
                                                          status_value=recurse_status,
                                                          config=config)
                        except CharonError as e:
                            error_text = ('Unable to update/verify Charon '
                                          'for {}: {}'.format(label, e))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name, sample_name=sample_id,
                                              engine_name=engine, level="ERROR",
                                              workflow=workflow, info_text=error_text)
            except CharonError as e:
                error_text = ('Unable to update Charon for {}: '
                              '{}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              '"{}" status for "{}": {}'.format(workflow, label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
        session.commit()
    #Run Multiqc
    for pj_tuple in multiqc_projects:
        LOG.info("Running MultiQC on project {}".format(pj_tuple[1]))
        run_multiqc(pj_tuple[0], pj_tuple[1], pj_tuple[2])