示例#1
0
 def test_construct_charon_url(self):
     append_list = ["road", "to", "nowhere"]
     charon_session = CharonSession()
     # This is a weird test because it's the same code as I'm testing but it also seems weird to code it worse
     finished_url = "{}/api/v1/{}".format(
         charon_session._base_url, '/'.join([str(a) for a in append_list]))
     # The method expects not a list but individual args
     self.assertEqual(finished_url,
                      CharonSession().construct_charon_url(*append_list))
示例#2
0
def record_project_job(project,
                       job_id,
                       analysis_dir,
                       workflow=None,
                       engine='rna_ngi',
                       run_mode='local',
                       config=None,
                       config_file_path=None):
    with get_session() as db_session:
        project_db_obj = ProjectAnalysis(project_id=project.project_id,
                                         job_id=job_id,
                                         project_name=project.name,
                                         project_base_path=project.base_path,
                                         workflow=workflow,
                                         engine=engine,
                                         analysis_dir=analysis_dir,
                                         run_mode=run_mode)

        db_session.add(project_db_obj)
        db_session.commit()
        sample_status_value = "UNDER_ANALYSIS"
        for sample in project:
            if sample.being_analyzed:
                try:
                    LOG.info('Updating Charon status for project/sample '
                             '{}/{} : {}'.format(project, sample,
                                                 sample_status_value))
                    CharonSession().sample_update(
                        projectid=project.project_id,
                        sampleid=sample.name,
                        analysis_status=sample_status_value)

                    for libprep in sample:
                        if CharonSession().libprep_get(
                                project.project_id, sample.name,
                                libprep.name).get('qc') != "FAILED":
                            for seqrun in libprep:
                                if seqrun.being_analyzed:
                                    CharonSession().seqrun_update(
                                        project.project_id,
                                        sample.name,
                                        libprep.name,
                                        seqrun.name,
                                        alignment_status="RUNNING")
                except Exception as e:
                    LOG.error(
                        "Could not update Charon for sample {}/{} : {}".format(
                            project.project_id, sample.name, e))
示例#3
0
def fetch_charon(context, project, threshold, all_samples):
    """
    Will fetch samples of the specified project from Charon and print the concordance
    """
    try:
        # get result from charon
        charon_session = CharonSession()
        result = charon_session.project_get_samples(project)
        samples = {}
        for sample in result.get('samples'):
            sample_id = sample.get('sampleid')
            concordance = float(sample.get('genotype_concordance'))
            status = sample.get('genotype_status')
            # exclude samples which were not yet checked
            if status is not None:
                samples[sample_id] = (concordance, status)

        # print output
        if not all_samples and samples:
            print 'Samples below threshold: {}%'.format(threshold)
        for sample in sorted(samples.keys()):
            concordance, status = samples[sample]
            # if --all, we don't care about threshold
            if all_samples or concordance <= threshold:
                # do not print 0%
                if concordance != 0:
                    print '{} {}% {}'.format(sample, concordance, status)
    except Exception, e:
        log.error("Can't fetch Charon. Error says: {}".format(str(e)))
示例#4
0
 def add_supr_name_delivery_in_charon(self, supr_name_of_delivery):
     '''Updates delivery_projects in Charon at project level
     '''
     charon_session = CharonSession()
     try:
         #fetch the project
         sample_charon = charon_session.sample_get(self.projectid,
                                                   self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if supr_name_of_delivery not in sample_charon:
             delivery_projects.append(supr_name_of_delivery)
             charon_session.sample_update(
                 self.projectid,
                 self.sampleid,
                 delivery_projects=delivery_projects)
             logger.info(
                 'Charon delivery_projects for sample {} updated with value {}'
                 .format(self.sampleid, supr_name_of_delivery))
         else:
             logger.warn(
                 'Charon delivery_projects for sample {} not updated with value {} because the value was already present'
                 .format(self.sampleid, supr_name_of_delivery))
     except Exception, e:
         logger.error(
             'Failed to update delivery_projects in charon while delivering {}. Error says: {}'
             .format(self.sampleid, e))
         logger.exception(e)
 def save_delivery_token_in_charon(self, delivery_token):
     '''Updates delivery_token in Charon
     '''
     ## TODO: need to update ngi_pipeline.database.classes.project_update
     ## and add field in Charon
     charon_session = CharonSession()
     charon_session.project_update(self.projectid, delivery_token=delivery_token)
示例#6
0
def recurse_status_for_sample(project_obj, set_status, update_done=False):
    """Set seqruns under sample to have status "set_status"
    """

    charon_session = CharonSession()
    project_id = project_obj.project_id
    for sample_obj in project_obj:
        # There's only one sample but this is an iterator
        sample_id = sample_obj.name
    for libprep_obj in sample_obj:
        libprep_id = libprep_obj.name
        for seqrun_obj in libprep_obj:
            seqrun_id = seqrun_obj.name
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            LOG.info(('Updating status of project/sample/libprep/seqrun '
                      '"{}" to "{}" in Charon ').format(label, set_status))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             alignment_status=set_status)
            except CharonError as e:
                error_text =('Could not update status of project/sample/libprep/seqrun '
                             '"{}" in Charon to "{}": {}'.format(label, set_status, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_obj.name,
                              level="ERROR", info_text=error_text)
示例#7
0
def recreate_project_from_db(analysis_top_dir, project_name, project_id):
    project_dir = os.path.join(analysis_top_dir, "DATA", project_name)
    project_obj = NGIProject(name=project_name,
                             dirname=project_name,
                             project_id=project_id,
                             base_path=analysis_top_dir)
    charon_session = CharonSession()
    try:
        samples_dict = charon_session.project_get_samples(project_id)["samples"]
    except CharonError as e:
        raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e))
    for sample in samples_dict:
        sample_id = sample.get("sampleid")
        sample_dir = os.path.join(project_dir, sample_id)
        sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id)
        sample_obj.status = sample.get("status", "unknown")
        try:
            libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"]
        except CharonError as e:
            raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e))
        for libprep in libpreps_dict:
            libprep_id = libprep.get("libprepid")
            libprep_obj = sample_obj.add_libprep(name=libprep_id,  dirname=libprep_id)
            libprep_obj.status = libprep.get("status", "unknown")
            try:
                seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"]
            except CharonError as e:
                raise RuntimeError("Could not access seqruns for project {} / sample {} / "
                                   "libprep {}: {}".format(project_id, sample_id, libprep_id, e))
            for seqrun in seqruns_dict:
                # e.g. 140528_D00415_0049_BC423WACXX
                seqrun_id = seqrun.get("seqrunid")
                seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id)
                seqrun_obj.status = seqrun.get("status", "unknown")
    return project_obj
示例#8
0
def write_to_charon_NGI_results(job_id, return_code, run_dir):
    """Update the status of a sequencing run after alignment.

    :param NGIProject project_id: The name of the project, sample, lib prep, flowcell id
    :param int return_code: The return code of the workflow process
    :param string run_dir: the directory where results are stored (I know that I am running piper)

    :raises RuntimeError: If the Charon database could not be updated
    """
    charon_session = CharonSession()
    # Consider moving this mapping to the CharonSession object or something
    if return_code is None:
        status = "RUNNING"
    elif return_code == 0:
        status = "DONE"
    else:
        ## TODO we need to differentiate between COMPUTATION_FAILED and DATA_FAILED
        ##      also there is IGNORE?
        status = "COMPUTATION_FAILED"
    try:
        m_dict = STHLM_UUSNP_SAMPLE_RE.match(job_id).groupdict()
        #m_dict = re.match(r'?P<project_name>\w\.\w+_\d+_\d+|\w{2}-\d+)_(?P<sample_id>[\w-]+)_(?P<libprep_id>\w|\w{2}\d{3}_\2)_(?P<seqrun_id>\d{6}_\w+_\d{4}_.{10})', job_id).groupdict()
        project_id = get_project_id_from_name(m_dict['project_name'])
        sample_id = m_dict['sample_id']
    except (TypeError, AttributeError):
        error_msg = "Could not parse project/sample ids from job id \"{}\"; cannot update Charon with results!".format(
            job_id)
        raise RuntimeError(error_msg)
    try:
        charon_session.sample_update(project_id, sample_id, status=status)
    except CharonError as e:
        error_msg = ('Failed to update sample status to "{}" for sample "{}" '
                     'in Charon database: {}'.format(status, project_id,
                                                     sample_id, e))
        raise RuntimeError(error_msg)
示例#9
0
def update_gtc_for_sample(project_id, sample_id, piper_gtc_path, config=None, config_file_path=None):
    """Find the genotype concordance file for this sample, if it exists,
    and update the sample record in Charon with the value parsed from it.

    :param str project_id: The id of the project
    :param str sample_id: The id the sample
    :param str piper_gtc_path: The path to the piper genotype concordance directory

    :raises CharonError: If there is some Error -- with Charon
    :raises IOError: If the path specified is missing or inaccessible
    :raises ValueError: If the specified sample has no data in the gtc file
    """
    gtc_file = os.path.join(piper_gtc_path, "{}.gt_concordance".format(sample_id))
    try:
        concordance_value = parse_genotype_concordance(gtc_file)[sample_id]
    except KeyError:
        raise ValueError('Concordance data for sample "{}" not found in gt '
                         'concordance file "{}"'.format(sample_id, gtc_file))
    gtc_lower_bound = config.get("genotyping", {}).get("lower_bound_cutoff")
    status_dict = {}
    if gtc_lower_bound:
        if concordance_value < concordance_value:
            status_dict = {"genotype_status": "FAILED"}
        else:
            status_dict = {"genotype_status": "PASSED"}
    charon_session = CharonSession()
    charon_session.sample_update(projectid=project_id, sampleid=sample_id,
                                 genotype_concordance=concordance_value,
                                 **status_dict)
示例#10
0
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    try:
        best_practice_analysis = charon_session.project_get(
            project.project_id)["best_practice_analysis"]
        if not best_practice_analysis:
            raise KeyError(
                "For once in my life ever can't you just fill in the forms properly"
            )
    except KeyError:
        error_msg = (
            'No best practice analysis specified in Charon for '
            'project "{}". Using "whole_genome_reseq"'.format(project))
        LOG.error(error_msg)
        best_practice_analysis = "whole_genome_reseq"
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
示例#11
0
def check_for_preexisting_sample_runs(project_obj, sample_obj, restart_running_jobs, restart_finished_jobs):
    """If any analysis is undergoing or has completed for this sample's
    seqruns, raise a RuntimeError.

    :param NGIProject project_obj: The project object
    :param NGISample sample_obj: The sample object
    :param boolean restart_running_jobs: command line parameter
    :param boolean restart_finished_jobs: command line parameter

    :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue
    """
    project_id = project_obj.project_id
    sample_id = sample_obj.name
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    for libprep in sample_libpreps['libpreps']:
        libprep_id = libprep['libprepid']
        for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                         sampleid=sample_id,
                                                         libprepid=libprep_id)['seqruns']:
            seqrun_id = seqrun['seqrunid']
            aln_status = charon_session.seqrun_get(projectid=project_id,
                                                   sampleid=sample_id,
                                                   libprepid=libprep_id,
                                                   seqrunid=seqrun_id).get('alignment_status')
            if (aln_status == "RUNNING" and not restart_running_jobs) or \
                (aln_status == "DONE" and not restart_finished_jobs):
                    raise RuntimeError('Project/Sample "{}/{}" has a preexisting '
                          'seqrun "{}" with status "{}"'.format(project_obj,
                          sample_obj, seqrun_id, aln_status))
示例#12
0
def update_analysis(project_id, status):
    charon_session = CharonSession()
    mail_analysis(project_id,
                  engine_name='rna_ngi',
                  level='INFO' if status else 'ERROR')
    new_sample_status = 'ANALYZED' if status else 'FAILED'
    new_seqrun_status = 'DONE' if status else 'FAILED'
    for sample in charon_session.project_get_samples(project_id).get(
            "samples", {}):
        if sample.get('analysis_status') == "UNDER_ANALYSIS":
            LOG.info("Marking analysis of sample {}/{} as {}".format(
                project_id, sample.get('sampleid'), new_sample_status))
            charon_session.sample_update(project_id,
                                         sample.get('sampleid'),
                                         analysis_status=new_sample_status)
            for libprep in charon_session.sample_get_libpreps(
                    project_id, sample.get('sampleid')).get('libpreps', {}):
                if libprep.get('qc') != 'FAILED':
                    for seqrun in charon_session.libprep_get_seqruns(
                            project_id, sample.get('sampleid'),
                            libprep.get('libprepid')).get('seqruns', {}):
                        if seqrun.get('alignment_status') == "RUNNING":
                            LOG.info(
                                "Marking analysis of seqrun {}/{}/{}/{} as {}".
                                format(project_id, sample.get('sampleid'),
                                       libprep.get('libprepid'),
                                       seqrun.get('seqrunid'),
                                       new_seqrun_status))
                            charon_session.seqrun_update(
                                project_id,
                                sample.get('sampleid'),
                                libprep.get('libprepid'),
                                seqrun.get('seqrunid'),
                                alignment_status=new_seqrun_status)
示例#13
0
def get_finished_seqruns_for_sample(project_id, sample_id,
                                    include_failed_libpreps=False):
    """Find all the finished seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict
    """
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                aln_status = charon_session.seqrun_get(projectid=project_id,
                                                       sampleid=sample_id,
                                                       libprepid=libprep_id,
                                                       seqrunid=seqrun_id).get('alignment_status')
                if aln_status == "DONE":
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.debug('Skipping seqrun "{}" due to alignment_status '
                              '"{}"'.format(seqrun_id, aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#14
0
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir):
    """Find all the valid seqruns for a particular sample, parse their
    qualimap output files, and update Charon with the mean autosomal
    coverage for each.

    :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing)
    :param str sample_id: The sample name (e.g. P1170_105)

    :raises OSError: If the qc path specified is missing or otherwise inaccessible
    :raises RuntimeError: If you specify both the seqrun_id and fcid and they don't match
    :raises ValueError: If arguments are incorrect
    """
    seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id)

    charon_session = CharonSession()
    for libprep_id, seqruns in seqruns_by_libprep.iteritems():
        for seqrun_id in seqruns:
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            ma_coverage = _parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id)
            LOG.info('Updating project/sample/libprep/seqrun "{}" in '
                     'Charon with mean autosomal coverage "{}"'.format(label,  ma_coverage))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             mean_autosomal_coverage=ma_coverage)
            except CharonError as e:
                error_text = ('Could not update project/sample/libprep/seqrun "{}" '
                              'in Charon with mean autosomal coverage '
                              '"{}": {}'.format(label, ma_coverage, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_id,
                              engine_name="piper_ngi", level="ERROR", info_text=error_text)
示例#15
0
 def save_delivery_token_in_charon(self, delivery_token):
     '''Updates delivery_token in Charon at sample level
     '''
     charon_session = CharonSession()
     charon_session.sample_update(self.projectid,
                                  self.sampleid,
                                  delivery_token=delivery_token)
示例#16
0
def get_project_id_from_name(project_name):
    """Given the project name ("Y.Mom_14_01") return the project ID ("P123")

    :param str project_name: The human-friendly name of the project (e.g. "J.Doe_14_01")

    :returns: The alphanumeric database-friendly name of the project (e.g. "P123")
    :rtype: str

    :raises RuntimeError: If there is some problem relating to the GET (HTTP Return code != 200)
    :raises ValueError: If the project has no project id in the database or if the project does not exist in Charon
    """
    charon_session = CharonSession()

    try:
        project_id = charon_session.project_get(project_name)
    except CharonError as e:
        if e.status_code == 404:
            new_e = ValueError('Project "{}" missing from database: {}'.format(
                project_name, e))
            new_e.status_code = 404
            raise e
        else:
            raise
    try:
        return project_id['projectid']
    except KeyError:
        raise ValueError(
            'Couldn\'t retrieve project id for project "{}"; '
            'this project\'s database entry has no "projectid" value.'.format(
                project))
def update_sample_duplication_and_coverage(project_id,
                                           sample_id,
                                           project_base_path,
                                           config=None,
                                           config_file_path=None):
    """Update Charon with the duplication rates for said sample.

    :param str project_base_path: The path to the project dir 
    :param str sample_id: The sample name (e.g. P1170_105)

    """

    dup_file_path = os.path.join(project_base_path, 'ANALYSIS', project_id,
                                 'piper_ngi', '05_processed_alignments',
                                 "{}.metrics".format(sample_id))
    genome_results_file_path = os.path.join(
        project_base_path, 'ANALYSIS', project_id, 'piper_ngi',
        '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id),
        "genome_results.txt")

    try:
        dup_pc = parse_deduplication_percentage(dup_file_path)
    except:
        dup_pc = 0
        LOG.error(
            "Cannot find {}.metrics file for duplication rate at {}. Continuing."
            .format(sample_id, dup_file_path))
    try:
        cov = parse_qualimap_coverage(genome_results_file_path)
        reads = parse_qualimap_reads(genome_results_file_path)
    except IOError as e:
        cov = 0
        reads = 0
        LOG.error(
            "Cannot find genome_results.txt file for sample coverage at {}. Continuing."
            .format(genome_results_file_path))
    try:
        charon_session = CharonSession()
        charon_session.sample_update(projectid=project_id,
                                     sampleid=sample_id,
                                     duplication_pc=dup_pc,
                                     total_sequenced_reads=reads,
                                     total_autosomal_coverage=cov)
        LOG.info(
            'Updating sample "{}" in '
            'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"'
            .format(sample_id, dup_pc, cov))
    except CharonError as e:
        error_text = ('Could not update project/sample "{}/{}" '
                      'in Charon with duplication rate : {}'
                      'and coverage {}'.format("{}/{}".format(
                          project_id, sampleid, dup_pc, cov)))
        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id,
                          sample_name=sample_id,
                          engine_name="piper_ngi",
                          level="ERROR",
                          info_text=error_text)
def main(demux_fcid_dir, restrict_to_projects=None, restrict_to_samples=None):

    demux_fcid_dir = "/proj/a2014205/INBOX/140528_D00415_0049_BC423WACXX"  # G.Grigelioniene_14_01
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140702_D00415_0052_AC41A2ANXX"  # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106, P1171_108
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140905_D00415_0057_BC45KVANXX"  # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106 ---- rerun
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0222_AC4HA6ACXX"  # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105
    process_demultiplexed_flowcell(
        demux_fcid_dir, None,
        None)  # M.Kaller_14_08 sample P1272_101, P1272_104
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0223_BC4HAPACXX"  # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105
    process_demultiplexed_flowcell(
        demux_fcid_dir, None,
        None)  # M.Kaller_14_08 sample P1272_101, P1272_104
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140919_SN1018_0203_BHA3THADXX"  # M.Kaller_14_05  P1170_103, P1170_105  --- rerun
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    ###UPPSALA

    demux_fcid_dir = "/proj/a2014205/INBOX/140821_D00458_0029_AC45JGANXX"  # uppsala run
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140917_D00458_0034_AC4FF3ANXX"  # -- rerun
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    #and now a loop to update the DB
    time.sleep(3800)
    charon_session = CharonSession()
    ####charon_session.project_delete("ND-0522")
    while True:
        update_charon_with_local_jobs_status(
        )  ## this updated local_db and charon accordingly
        # grab all projects from Charon
        projects_dict = charon_session.projects_get_all()['projects']
        for project_charon in projects_dict:
            project_name = project_charon["name"]
            project_dir = os.path.join(
                "/proj/a2014205/nobackup/NGI/analysis_ready/DATA",
                project_name)
            if os.path.isdir(project_dir):
                projectObj = recreate_project_from_filesystem(
                    project_dir, None)
                launch_analysis_for_samples([projectObj])
        time.sleep(3800)
示例#19
0
def get_valid_seqruns_for_sample(project_id,
                                 sample_id,
                                 include_failed_libpreps=False,
                                 include_done_seqruns=False,
                                 status_field="alignment_status"):
    """Find all the valid seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample
    :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC
    :param bool include_done_seqruns: Include seqruns that are already marked DONE

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict

    :raises ValueError: If status_field is not a valid value
    """
    valid_status_values = (
        "alignment_status",
        "genotype_status",
    )
    if status_field not in valid_status_values:
        raise ValueError('"status_field" argument must be one of {} '
                         '(value passed was "{}")'.format(
                             ", ".join(valid_status_values), status_field))
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(
                    projectid=project_id, sampleid=sample_id,
                    libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                try:
                    aln_status = charon_session.seqrun_get(
                        projectid=project_id,
                        sampleid=sample_id,
                        libprepid=libprep_id,
                        seqrunid=seqrun_id)[status_field]
                except KeyError:
                    LOG.error(
                        'Field "{}" not available for seqrun "{}" in Charon '
                        'for project "{}" / sample "{}". Including as '
                        'valid.'.format(status_field, seqrun_id, project_id,
                                        sample_id))
                    aln_status = None
                if aln_status != "DONE" or include_done_seqruns:
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.info('Skipping seqrun "{}" due to {}'
                             '"{}"'.format(seqrun_id, status_field,
                                           aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#20
0
 def get_delivery_token_in_charon(self):
     '''fetches delivery_token from Charon
     '''
     charon_session = CharonSession()
     project_charon = charon_session.project_get(self.projectid)
     if project_charon.get('delivery_token'):
         return project_charon.get('delivery_token')
     else:
         return 'NO-TOKEN'
示例#21
0
    def __init__(self, config, log, charon_session=None):
        """
        Create a CharonConnector object which provides an interface to the Charon sample tracking database.

        :param config: dict with configuration options
        :param log: a log handle where the connector will log its output
        :param charon_session: an active database session to use, if not specified, a new session will be created
        """
        self.config = config
        self.log = log
        self.charon_session = charon_session or CharonSession(config=self.config)
示例#22
0
def reset_charon_records_by_name(project_id, restrict_to_samples=None,
                                 restrict_to_libpreps=None, restrict_to_seqruns=None):
    if not restrict_to_samples: restrict_to_samples = []
    if not restrict_to_libpreps: restrict_to_libpreps = []
    if not restrict_to_seqruns: restrict_to_seqruns = []
    charon_session = CharonSession()
    LOG.info("Resetting Charon record for project {}".format(project_id))
    charon_session.project_reset(projectid=project_id)
    LOG.info("Charon record for project {} reset".format(project_id))
    for sample in charon_session.project_get_samples(projectid=project_id).get('samples', []):
        sample_id = sample['sampleid']
        if restrict_to_samples and sample_id not in restrict_to_samples:
            LOG.info("Skipping project/sample {}/{}: not in list of samples to use "
                     "({})".format(project_id, sample_id, ", ".join(restrict_to_samples)))
            continue
        LOG.info("Resetting Charon record for project/sample {}/{}".format(project_id,
                                                                           sample_id))
        charon_session.sample_reset(projectid=project_id, sampleid=sample_id)
        LOG.info("Charon record for project/sample {}/{} reset".format(project_id,
                                                                       sample_id))
        for libprep in charon_session.sample_get_libpreps(projectid=project_id,
                                                          sampleid=sample_id).get('libpreps', []):
            libprep_id = libprep['libprepid']
            if restrict_to_libpreps and libprep_id not in restrict_to_libpreps:
                LOG.info("Skipping project/sample/libprep {}/{}/{}: not in list "
                         "of libpreps to use ({})".format(project_id, sample_id,
                                                          libprep_id, ", ".join(restrict_to_libpreps)))
                continue
            LOG.info("Resetting Charon record for project/sample"
                     "libprep {}/{}/{}".format(project_id, sample_id, libprep_id))
            charon_session.libprep_reset(projectid=project_id, sampleid=sample_id,
                                         libprepid=libprep_id)
            LOG.info("Charon record for project/sample/libprep {}/{}/{} "
                     "reset".format(project_id, sample_id, libprep_id))
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id).get('seqruns', []):
                seqrun_id = seqrun['seqrunid']
                if restrict_to_seqruns and seqrun_id not in restrict_to_seqruns:
                    LOG.info("Skipping project/sample/libprep/seqrun {}/{}/{}/{}: "
                             "not in list of seqruns to use ({})".format(project_id,
                                                                         sample_id,
                                                                         libprep_id,
                                                                         seqrun_id,
                                                                         ", ".join(restrict_to_seqruns)))
                    continue
                LOG.info("Resetting Charon record for project/sample/libprep/"
                         "seqrun {}/{}/{}/{}".format(project_id, sample_id,
                                                     libprep_id, seqrun_id))
                charon_session.seqrun_reset(projectid=project_id, sampleid=sample_id,
                                            libprepid=libprep_id, seqrunid=seqrun_id)
                LOG.info("Charon record for project/sample/libprep/seqrun "
                         "{}/{}/{}/{} reset".format(project_id, sample_id,
                                                    libprep_id, seqrun_id))
示例#23
0
    def test_workflows(self):
        config_file_path = locate_ngi_config()
        config = load_yaml_config(config_file_path)

        for workflow_name, workflow_dict in config.get("test_data", {}).get(
                "workflows", {}).iteritems():
            # Load and rewrite config file as needed
            customize_config_dict = workflow_dict.get("customize_config")
            if customize_config_dict:
                config = update_dict(config, customize_config_dict)

            #self._install_test_files(workflow_dict)
            LOG.info(
                'Starting test analysis pipeline for workflow "{}"'.format(
                    workflow_name))
            try:
                local_files = workflow_dict["local_files"]
            except KeyError:
                raise ValueError(
                    "Required paths to input files for testing do not"
                    "exist in config file (test_data.workflows."
                    "{}.local_files); cannot proceed.".format(workflow_name))
            try:
                flowcell_path = local_files["flowcell"]
            except KeyError:
                raise ValueError(
                    "Path to flowcell is required and not specified "
                    "in configuration file (test_data.workflows."
                    "{}.local_files.flowcell); cannot proceed.".format(
                        workflow_name))
            try:
                test_project = workflow_dict["test_project"]
                test_proj_id = test_project["project_id"]
                test_proj_name = test_project["project_name"]
                test_proj_bpa = test_project["bpa"]
            except KeyError as e:
                raise ValueError(
                    "Test project information is missing from config "
                    "file (under test_data.workflows.{}.test_project "
                    "({}); cannot proceed.".format(workflow_name, e.msg))
            charon_session = CharonSession(config=config)
            try:
                charon_session.project_delete(projectid=test_proj_id)
            except CharonError:
                pass
            charon_session.project_create(projectid=test_proj_id,
                                          name=test_proj_name,
                                          status="OPEN",
                                          best_practice_analysis=test_proj_bpa)

            process_demultiplexed_flowcells([flowcell_path],
                                            fallback_libprep="A",
                                            config=config)
 def setUpClass(cls):
     cls.session = CharonSession()
     # Project
     cls.p_id = "P100000"
     cls.p_name = "Y.Mom_14_01"
     cls.p_bp = tempfile.mkdtemp()
     # Sample
     cls.s_id = "{}_101".format(cls.p_id)
     # Libprep
     cls.l_id = "A"
     # Seqrun
     cls.sr_id = generate_run_id()
示例#25
0
 def setUpClass(cls):
     cls.session = CharonSession()
     # Project
     cls.p_id = "P100000"
     cls.p_name = "Y.Mom_14_01"
     # Sample
     cls.s_id = "{}_101".format(cls.p_id)
     # Libprep
     cls.l_id = "A"
     # Seqrun
     cls.sr_id = generate_run_id()
     cls.sr_total_reads = 1000000
     cls.sr_mac = 30
    def get_staged_samples_from_charon(self):
        charon_session = CharonSession()
        result = charon_session.project_get_samples(self.projectid)
        samples = result.get('samples')
        if samples is None:
            raise AssertionError('CharonSession returned no results for project {}'.format(self.projectid))

        staged_samples = []
        for sample in samples:
            sample_id = sample.get('sampleid')
            delivery_status = sample.get('delivery_status')
            if delivery_status == 'STAGED':
                staged_samples.append(sample_id)
        return staged_samples
示例#27
0
def record_process_sample(project, sample, workflow_subtask, analysis_module_name,
                          process_id=None, slurm_job_id=None, config=None):
    LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", '
             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
    with get_db_session() as session:
        sample_db_obj = SampleAnalysis(project_id=project.project_id,
                                       project_name=project.name,
                                       project_base_path=project.base_path,
                                       sample_id=sample.name,
                                       engine=analysis_module_name,
                                       workflow=workflow_subtask,
                                       process_id=process_id,
                                       slurm_job_id=slurm_job_id)
        try:
            session.add(sample_db_obj)
            for attempts in range(3):
                try:
                    session.commit()
                    LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", '
                             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
                    break
                except OperationalError as e:
                    LOG.warn('Database locked ("{}"). Waiting...'.format(e))
                    time.sleep(15)
            else:
                raise RuntimeError("Could not write to database after three attempts (locked?)")
        except (IntegrityError, RuntimeError):
            raise RuntimeError('Could not record slurm job id "{}" for project "{}", sample "{}", '
                               'workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e))
    try:
        set_status = "UNDER_ANALYSIS"
        LOG.info(('Updating Charon status for project/sample '
                  '{}/{} to {}').format(project, sample, set_status))
        CharonSession().sample_update(projectid=project.project_id,
                                      sampleid=sample.name,
                                      analysis_status=set_status)
        project_obj = create_project_obj_from_analysis_log(project.name,
                                                           project.project_id,
                                                           project.base_path,
                                                           sample.name,
                                                           workflow_subtask)
        recurse_status_for_sample(project_obj, "RUNNING")
    except CharonError as e:
        error_text = ('Could not update Charon status for project/sample '
                      '{}/{} due to error: {}'.format(project, sample, e))

        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id, sample_name=sample_id,
                      engine_name='piper_ngi', level="ERROR", info_text=error_text)
示例#28
0
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    best_practice_analysis = charon_session.project_get(
        project.project_id)["best_practice_analysis"]
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
示例#29
0
def find_projects_from_samples(sample_list):
    """Given a list of samples, attempts to determine
    which projects they belong to using Charon records.

    :param list sample_list: A list of the samples for which to find projects

    :returns: a dict of {project_id: set(samples)}
    :rtype: dict of sets

    :raises ValueError: If you fail to pass in a list. Nice work!
    """
    STHLM_SAMPLE_RE = re.compile(r'(P\d{4})_')
    projects_dict = collections.defaultdict(set)
    samples_by_project_id = {}
    no_owners_found = set()
    multiple_owners_found = set()
    charon_session = CharonSession()
    if not type(sample_list) is list:
        raise ValueError("Input should be list.")

    for sample_name in sample_list:
        # First see if we can just parse out the project id from the sample name
        m = STHLM_SAMPLE_RE.match(sample_name)
        if m:
            project_id = m.groups()[0]
            try:
                # Ensure that we guessed right
                charon_session.sample_get(project_id, sample_name)
            except CharonError as e:
                LOG.debug('Project for sample "{}" appears to be "{}" but is not '
                          'present in Charon ({})'.format(sample_name, project_id, e))
                no_owners_found.add(sample_name)
            else:
                projects_dict[project_id].add(sample_name)
        else:
            # Otherwise check all the projects for matching samples (returns list or None)
            owner_projects_list = charon_session.sample_get_projects(sample_name)
            if not owner_projects_list:
                no_owners_found.add(sample_name)
            elif len(owner_projects_list) > 1:
                multiple_owners_found.add(sample_name)
            else:
                projects_dict[owner_projects_list[0]].add(sample_name)
    if no_owners_found:
        LOG.warn("No projects found for the following samples: {}".format(", ".join(no_owners_found)))
    if multiple_owners_found:
        LOG.warn('Multiple projects found with the following samples (owner '
                 'could not be unamibugously determined): {}'.format(", ".join(multiple_owners_found)))
    return dict(projects_dict)
示例#30
0
 def get_samples_from_charon(self, delivery_status='STAGED'):
     """Takes as input a delivery status and return all samples with that delivery status
     """
     charon_session = CharonSession()
     result = charon_session.project_get_samples(self.projectid)
     samples = result.get('samples')
     if samples is None:
         raise AssertionError('CharonSession returned no results for project {}'.format(self.projectid))
     samples_of_interest = []
     for sample in samples:
         sample_id = sample.get('sampleid')
         charon_delivery_status = sample.get('delivery_status')
         if charon_delivery_status == delivery_status or delivery_status is None:
             samples_of_interest.append(sample_id)
     return samples_of_interest