def write_to_charon_NGI_results(job_id, return_code, run_dir):
    """Update the status of a sequencing run after alignment.

    :param NGIProject project_id: The name of the project, sample, lib prep, flowcell id
    :param int return_code: The return code of the workflow process
    :param string run_dir: the directory where results are stored (I know that I am running piper)

    :raises RuntimeError: If the Charon database could not be updated
    """
    charon_session = CharonSession()
    # Consider moving this mapping to the CharonSession object or something
    if return_code is None:
        status = "RUNNING"
    elif return_code == 0:
        status = "DONE"
    else:
        ## TODO we need to differentiate between COMPUTATION_FAILED and DATA_FAILED
        ##      also there is IGNORE?
        status = "COMPUTATION_FAILED"
    try:
        m_dict = STHLM_UUSNP_SAMPLE_RE.match(job_id).groupdict()
        #m_dict = re.match(r'?P<project_name>\w\.\w+_\d+_\d+|\w{2}-\d+)_(?P<sample_id>[\w-]+)_(?P<libprep_id>\w|\w{2}\d{3}_\2)_(?P<seqrun_id>\d{6}_\w+_\d{4}_.{10})', job_id).groupdict()
        project_id = get_project_id_from_name(m_dict['project_name'])
        sample_id = m_dict['sample_id']
    except (TypeError, AttributeError):
        error_msg = "Could not parse project/sample ids from job id \"{}\"; cannot update Charon with results!".format(job_id)
        raise RuntimeError(error_msg)
    try:
        charon_session.sample_update(project_id, sample_id, status=status)
    except CharonError as e:
        error_msg = ('Failed to update sample status to "{}" for sample "{}" '
                     'in Charon database: {}'.format(status, project_id, sample_id, e))
        raise RuntimeError(error_msg)
def update_gtc_for_sample(project_id, sample_id, piper_gtc_path, config=None, config_file_path=None):
    """Find the genotype concordance file for this sample, if it exists,
    and update the sample record in Charon with the value parsed from it.

    :param str project_id: The id of the project
    :param str sample_id: The id the sample
    :param str piper_gtc_path: The path to the piper genotype concordance directory

    :raises CharonError: If there is some Error -- with Charon
    :raises IOError: If the path specified is missing or inaccessible
    :raises ValueError: If the specified sample has no data in the gtc file
    """
    gtc_file = os.path.join(piper_gtc_path, "{}.gt_concordance".format(sample_id))
    try:
        concordance_value = parse_genotype_concordance(gtc_file)[sample_id]
    except KeyError:
        raise ValueError('Concordance data for sample "{}" not found in gt '
                         'concordance file "{}"'.format(sample_id, gtc_file))
    gtc_lower_bound = config.get("genotyping", {}).get("lower_bound_cutoff")
    status_dict = {}
    if gtc_lower_bound:
        if concordance_value < concordance_value:
            status_dict = {"genotype_status": "FAILED"}
        else:
            status_dict = {"genotype_status": "PASSED"}
    charon_session = CharonSession()
    charon_session.sample_update(projectid=project_id, sampleid=sample_id,
                                 genotype_concordance=concordance_value,
                                 **status_dict)
示例#3
0
def get_project_id_from_name(project_name):
    """Given the project name ("Y.Mom_14_01") return the project ID ("P123")

    :param str project_name: The human-friendly name of the project (e.g. "J.Doe_14_01")

    :returns: The alphanumeric database-friendly name of the project (e.g. "P123")
    :rtype: str

    :raises RuntimeError: If there is some problem relating to the GET (HTTP Return code != 200)
    :raises ValueError: If the project has no project id in the database or if the project does not exist in Charon
    """
    charon_session = CharonSession()

    try:
        project_id = charon_session.project_get(project_name)
    except CharonError as e:
        if e.status_code == 404:
            new_e = ValueError('Project "{}" missing from database: {}'.format(project_name, e))
            new_e.status_code = 404
            raise e
        else:
            raise
    try:
        return project_id['projectid']
    except KeyError:
        raise ValueError('Couldn\'t retrieve project id for project "{}"; '
                         'this project\'s database entry has no "projectid" value.'.format(project))
示例#4
0
class TestCommunicate(unittest.TestCase):

    def setUp(self):
        # Create the test project
        self.project_id = "P100000"
        self.project_name = "P.Mayhem_14_01"
        self.project_data = dict(projectid=self.project_id, name=self.project_name, status=None)
        self.session = CharonSession()
        response = self.session.post(self.session.construct_charon_url('project'),
                                     data=json.dumps(self.project_data))
        assert response.status_code == 201, "Could not create test project in Charon: {}".format(response.reason)
        project = response.json()
        assert project['projectid'] == self.project_id, "Test project ID is incorrect"


    def tearDown(self):
        # Remove the test project
        response = self.session.delete(self.session.construct_charon_url('project', self.project_id))
        assert response.status_code == 204, "Could not delete test project from Charon: {}".format(response.reason)


    def test_get_project_id_from_name(self):
        # Check that it matches
        self.assertEqual(self.project_id, get_project_id_from_name(self.project_name))

    def test_rebuild_project_obj_from_charon(self):
        # Create fake project / sample / libprep / seqrun
        pass
def fetch_charon(context, project, threshold, all_samples):
    """
    Will fetch samples of the specified project from Charon and print the concordance
    """
    try:
    # get result from charon
        charon_session = CharonSession()
        result = charon_session.project_get_samples(project)
        samples = {}
        for sample in result.get('samples'):
            sample_id = sample.get('sampleid')
            concordance = float(sample.get('genotype_concordance'))
            status = sample.get('genotype_status')
            # exclude samples which were not yet checked
            if status is not None:
                samples[sample_id] = (concordance, status)

        # print output
        if not all_samples and samples:
            print 'Samples below threshold: {}%'.format(threshold)
        for sample in sorted(samples.keys()):
            concordance, status = samples[sample]
            # if --all, we don't care about threshold
            if all_samples or concordance <= threshold:
                # do not print 0%
                if concordance != 0:
                    print '{} {}% {}'.format(sample, concordance, status)
    except Exception, e:
        log.error("Can't fetch Charon. Error says: {}".format(str(e)))
示例#6
0
def check_for_preexisting_sample_runs(project_obj, sample_obj, restart_running_jobs, restart_finished_jobs):
    """If any analysis is undergoing or has completed for this sample's
    seqruns, raise a RuntimeError.

    :param NGIProject project_obj: The project object
    :param NGISample sample_obj: The sample object
    :param boolean restart_running_jobs: command line parameter
    :param boolean restart_finished_jobs: command line parameter

    :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue
    """
    project_id = project_obj.project_id
    sample_id = sample_obj.name
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    for libprep in sample_libpreps['libpreps']:
        libprep_id = libprep['libprepid']
        for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                         sampleid=sample_id,
                                                         libprepid=libprep_id)['seqruns']:
            seqrun_id = seqrun['seqrunid']
            aln_status = charon_session.seqrun_get(projectid=project_id,
                                                   sampleid=sample_id,
                                                   libprepid=libprep_id,
                                                   seqrunid=seqrun_id).get('alignment_status')
            if (aln_status == "RUNNING" and not restart_running_jobs) or \
                (aln_status == "DONE" and not restart_finished_jobs):
                    raise RuntimeError('Project/Sample "{}/{}" has a preexisting '
                          'seqrun "{}" with status "{}"'.format(project_obj,
                          sample_obj, seqrun_id, aln_status))
示例#7
0
def get_finished_seqruns_for_sample(project_id, sample_id,
                                    include_failed_libpreps=False):
    """Find all the finished seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict
    """
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                aln_status = charon_session.seqrun_get(projectid=project_id,
                                                       sampleid=sample_id,
                                                       libprepid=libprep_id,
                                                       seqrunid=seqrun_id).get('alignment_status')
                if aln_status == "DONE":
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.debug('Skipping seqrun "{}" due to alignment_status '
                              '"{}"'.format(seqrun_id, aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#8
0
def recreate_project_from_db(analysis_top_dir, project_name, project_id):
    project_dir = os.path.join(analysis_top_dir, "DATA", project_name)
    project_obj = NGIProject(name=project_name,
                             dirname=project_name,
                             project_id=project_id,
                             base_path=analysis_top_dir)
    charon_session = CharonSession()
    try:
        samples_dict = charon_session.project_get_samples(project_id)["samples"]
    except CharonError as e:
        raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e))
    for sample in samples_dict:
        sample_id = sample.get("sampleid")
        sample_dir = os.path.join(project_dir, sample_id)
        sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id)
        sample_obj.status = sample.get("status", "unknown")
        try:
            libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"]
        except CharonError as e:
            raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e))
        for libprep in libpreps_dict:
            libprep_id = libprep.get("libprepid")
            libprep_obj = sample_obj.add_libprep(name=libprep_id,  dirname=libprep_id)
            libprep_obj.status = libprep.get("status", "unknown")
            try:
                seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"]
            except CharonError as e:
                raise RuntimeError("Could not access seqruns for project {} / sample {} / "
                                   "libprep {}: {}".format(project_id, sample_id, libprep_id, e))
            for seqrun in seqruns_dict:
                # e.g. 140528_D00415_0049_BC423WACXX
                seqrun_id = seqrun.get("seqrunid")
                seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id)
                seqrun_obj.status = seqrun.get("status", "unknown")
    return project_obj
def recurse_status_for_sample(project_obj, set_status, update_done=False):
    """Set seqruns under sample to have status "set_status"
    """

    charon_session = CharonSession()
    project_id = project_obj.project_id
    for sample_obj in project_obj:
        # There's only one sample but this is an iterator
        sample_id = sample_obj.name
    for libprep_obj in sample_obj:
        libprep_id = libprep_obj.name
        for seqrun_obj in libprep_obj:
            seqrun_id = seqrun_obj.name
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            LOG.info(('Updating status of project/sample/libprep/seqrun '
                      '"{}" to "{}" in Charon ').format(label, set_status))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             alignment_status=set_status)
            except CharonError as e:
                error_text =('Could not update status of project/sample/libprep/seqrun '
                             '"{}" in Charon to "{}": {}'.format(label, set_status, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_obj.name,
                              level="ERROR", info_text=error_text)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir):
    """Find all the valid seqruns for a particular sample, parse their
    qualimap output files, and update Charon with the mean autosomal
    coverage for each.

    :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing)
    :param str sample_id: The sample name (e.g. P1170_105)

    :raises OSError: If the qc path specified is missing or otherwise inaccessible
    :raises RuntimeError: If you specify both the seqrun_id and fcid and they don't match
    :raises ValueError: If arguments are incorrect
    """
    seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id)

    charon_session = CharonSession()
    for libprep_id, seqruns in seqruns_by_libprep.iteritems():
        for seqrun_id in seqruns:
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            ma_coverage = _parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id)
            LOG.info('Updating project/sample/libprep/seqrun "{}" in '
                     'Charon with mean autosomal coverage "{}"'.format(label,  ma_coverage))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             mean_autosomal_coverage=ma_coverage)
            except CharonError as e:
                error_text = ('Could not update project/sample/libprep/seqrun "{}" '
                              'in Charon with mean autosomal coverage '
                              '"{}": {}'.format(label, ma_coverage, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_id,
                              engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_sample_duplication_and_coverage(project_id,
                                           sample_id,
                                           project_base_path,
                                           config=None,
                                           config_file_path=None):
    """Update Charon with the duplication rates for said sample.

    :param str project_base_path: The path to the project dir 
    :param str sample_id: The sample name (e.g. P1170_105)

    """

    dup_file_path = os.path.join(project_base_path, 'ANALYSIS', project_id,
                                 'piper_ngi', '05_processed_alignments',
                                 "{}.metrics".format(sample_id))
    genome_results_file_path = os.path.join(
        project_base_path, 'ANALYSIS', project_id, 'piper_ngi',
        '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id),
        "genome_results.txt")

    try:
        dup_pc = parse_deduplication_percentage(dup_file_path)
    except:
        dup_pc = 0
        LOG.error(
            "Cannot find {}.metrics file for duplication rate at {}. Continuing."
            .format(sample_id, dup_file_path))
    try:
        cov = parse_qualimap_coverage(genome_results_file_path)
        reads = parse_qualimap_reads(genome_results_file_path)
    except IOError as e:
        cov = 0
        reads = 0
        LOG.error(
            "Cannot find genome_results.txt file for sample coverage at {}. Continuing."
            .format(genome_results_file_path))
    try:
        charon_session = CharonSession()
        charon_session.sample_update(projectid=project_id,
                                     sampleid=sample_id,
                                     duplication_pc=dup_pc,
                                     total_sequenced_reads=reads,
                                     total_autosomal_coverage=cov)
        LOG.info(
            'Updating sample "{}" in '
            'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"'
            .format(sample_id, dup_pc, cov))
    except CharonError as e:
        error_text = ('Could not update project/sample "{}/{}" '
                      'in Charon with duplication rate : {}'
                      'and coverage {}'.format("{}/{}".format(
                          project_id, sampleid, dup_pc, cov)))
        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id,
                          sample_name=sample_id,
                          engine_name="piper_ngi",
                          level="ERROR",
                          info_text=error_text)
示例#12
0
def get_valid_seqruns_for_sample(project_id,
                                 sample_id,
                                 include_failed_libpreps=False,
                                 include_done_seqruns=False,
                                 status_field="alignment_status"):
    """Find all the valid seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample
    :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC
    :param bool include_done_seqruns: Include seqruns that are already marked DONE

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict

    :raises ValueError: If status_field is not a valid value
    """
    valid_status_values = (
        "alignment_status",
        "genotype_status",
    )
    if status_field not in valid_status_values:
        raise ValueError('"status_field" argument must be one of {} '
                         '(value passed was "{}")'.format(
                             ", ".join(valid_status_values), status_field))
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(
                    projectid=project_id, sampleid=sample_id,
                    libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                try:
                    aln_status = charon_session.seqrun_get(
                        projectid=project_id,
                        sampleid=sample_id,
                        libprepid=libprep_id,
                        seqrunid=seqrun_id)[status_field]
                except KeyError:
                    LOG.error(
                        'Field "{}" not available for seqrun "{}" in Charon '
                        'for project "{}" / sample "{}". Including as '
                        'valid.'.format(status_field, seqrun_id, project_id,
                                        sample_id))
                    aln_status = None
                if aln_status != "DONE" or include_done_seqruns:
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.info('Skipping seqrun "{}" due to {}'
                             '"{}"'.format(seqrun_id, status_field,
                                           aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
def main(demux_fcid_dir, restrict_to_projects=None, restrict_to_samples=None):

    demux_fcid_dir = "/proj/a2014205/INBOX/140528_D00415_0049_BC423WACXX"  # G.Grigelioniene_14_01
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140702_D00415_0052_AC41A2ANXX"  # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106, P1171_108
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140905_D00415_0057_BC45KVANXX"  # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106 ---- rerun
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0222_AC4HA6ACXX"  # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105
    process_demultiplexed_flowcell(
        demux_fcid_dir, None,
        None)  # M.Kaller_14_08 sample P1272_101, P1272_104
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0223_BC4HAPACXX"  # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105
    process_demultiplexed_flowcell(
        demux_fcid_dir, None,
        None)  # M.Kaller_14_08 sample P1272_101, P1272_104
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140919_SN1018_0203_BHA3THADXX"  # M.Kaller_14_05  P1170_103, P1170_105  --- rerun
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    ###UPPSALA

    demux_fcid_dir = "/proj/a2014205/INBOX/140821_D00458_0029_AC45JGANXX"  # uppsala run
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    demux_fcid_dir = "/proj/a2014205/INBOX/140917_D00458_0034_AC4FF3ANXX"  # -- rerun
    process_demultiplexed_flowcell(demux_fcid_dir, None, None)
    time.sleep(60)  #wait for 1 minutes

    #and now a loop to update the DB
    time.sleep(3800)
    charon_session = CharonSession()
    ####charon_session.project_delete("ND-0522")
    while True:
        update_charon_with_local_jobs_status(
        )  ## this updated local_db and charon accordingly
        # grab all projects from Charon
        projects_dict = charon_session.projects_get_all()['projects']
        for project_charon in projects_dict:
            project_name = project_charon["name"]
            project_dir = os.path.join(
                "/proj/a2014205/nobackup/NGI/analysis_ready/DATA",
                project_name)
            if os.path.isdir(project_dir):
                projectObj = recreate_project_from_filesystem(
                    project_dir, None)
                launch_analysis_for_samples([projectObj])
        time.sleep(3800)
示例#14
0
 def get_delivery_token_in_charon(self):
     '''fetches delivery_token from Charon
     '''
     charon_session = CharonSession()
     project_charon = charon_session.project_get(self.projectid)
     if project_charon.get('delivery_token'):
         return project_charon.get('delivery_token')
     else:
         return 'NO-TOKEN'
def main(demux_fcid_dir, restrict_to_projects=None, restrict_to_samples=None):


        demux_fcid_dir = "/proj/a2014205/INBOX/140528_D00415_0049_BC423WACXX" # G.Grigelioniene_14_01
        process_demultiplexed_flowcell(demux_fcid_dir, None, None)
        time.sleep(60) #wait for 1 minutes
        
        demux_fcid_dir = "/proj/a2014205/INBOX/140702_D00415_0052_AC41A2ANXX" # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106, P1171_108
        process_demultiplexed_flowcell(demux_fcid_dir, None, None)
        time.sleep(60) #wait for 1 minutes
        
        
        demux_fcid_dir = "/proj/a2014205/INBOX/140905_D00415_0057_BC45KVANXX" # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106 ---- rerun
        process_demultiplexed_flowcell(demux_fcid_dir, None, None)
        time.sleep(60) #wait for 1 minutes
        
        
        demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0222_AC4HA6ACXX" # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105
        process_demultiplexed_flowcell(demux_fcid_dir, None, None)            # M.Kaller_14_08 sample P1272_101, P1272_104
        time.sleep(60) #wait for 1 minutes
        
        demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0223_BC4HAPACXX" # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105
        process_demultiplexed_flowcell(demux_fcid_dir, None, None)            # M.Kaller_14_08 sample P1272_101, P1272_104
        time.sleep(60) #wait for 1 minutes
        
        
        demux_fcid_dir = "/proj/a2014205/INBOX/140919_SN1018_0203_BHA3THADXX" # M.Kaller_14_05  P1170_103, P1170_105  --- rerun
        process_demultiplexed_flowcell(demux_fcid_dir, None, None)
        time.sleep(60) #wait for 1 minutes


        ###UPPSALA
        
        demux_fcid_dir = "/proj/a2014205/INBOX/140821_D00458_0029_AC45JGANXX" # uppsala run
        process_demultiplexed_flowcell(demux_fcid_dir, None, None)
        time.sleep(60) #wait for 1 minutes

        demux_fcid_dir = "/proj/a2014205/INBOX/140917_D00458_0034_AC4FF3ANXX" # -- rerun
        process_demultiplexed_flowcell(demux_fcid_dir, None, None)
        time.sleep(60) #wait for 1 minutes

        
        #and now a loop to update the DB
        time.sleep(3800)
        charon_session = CharonSession()
        ####charon_session.project_delete("ND-0522")
        while True:
            update_charon_with_local_jobs_status() ## this updated local_db and charon accordingly
            # grab all projects from Charon
            projects_dict = charon_session.projects_get_all()['projects']
            for project_charon in projects_dict:
                project_name = project_charon["name"]
                project_dir  = os.path.join("/proj/a2014205/nobackup/NGI/analysis_ready/DATA", project_name)
                if os.path.isdir(project_dir):
                    projectObj = recreate_project_from_filesystem(project_dir, None)
                    launch_analysis_for_samples([projectObj])
            time.sleep(3800)
示例#16
0
 def test_construct_charon_url(self):
     append_list = ["road", "to", "nowhere"]
     charon_session = CharonSession()
     # This is a weird test because it's the same code as I'm testing but it also seems weird to code it worse
     finished_url = "{}/api/v1/{}".format(
         charon_session._base_url, '/'.join([str(a) for a in append_list]))
     # The method expects not a list but individual args
     self.assertEqual(finished_url,
                      CharonSession().construct_charon_url(*append_list))
示例#17
0
def main():
    args = cli_args()
    
    cs = CharonSession()
    cs.project_update(args.project_id,best_practice_analysis="hello_engine")
    # it is actually picking up stdout and stderr as well
    output = subprocess.check_output(["./nextflow", "run", "hello-ga.nf"])
    print "The output is:"
    print output
    print "done"
示例#18
0
def update_analysis(project_id, status):
    charon_session = CharonSession()
    mail_analysis(project_id,
                  engine_name='rna_ngi',
                  level='INFO' if status else 'ERROR')
    new_sample_status = 'ANALYZED' if status else 'FAILED'
    new_seqrun_status = 'DONE' if status else 'FAILED'
    for sample in charon_session.project_get_samples(project_id).get(
            "samples", {}):
        if sample.get('analysis_status') == "UNDER_ANALYSIS":
            LOG.info("Marking analysis of sample {}/{} as {}".format(
                project_id, sample.get('sampleid'), new_sample_status))
            charon_session.sample_update(project_id,
                                         sample.get('sampleid'),
                                         analysis_status=new_sample_status)
            for libprep in charon_session.sample_get_libpreps(
                    project_id, sample.get('sampleid')).get('libpreps', {}):
                if libprep.get('qc') != 'FAILED':
                    for seqrun in charon_session.libprep_get_seqruns(
                            project_id, sample.get('sampleid'),
                            libprep.get('libprepid')).get('seqruns', {}):
                        if seqrun.get('alignment_status') == "RUNNING":
                            LOG.info(
                                "Marking analysis of seqrun {}/{}/{}/{} as {}".
                                format(project_id, sample.get('sampleid'),
                                       libprep.get('libprepid'),
                                       seqrun.get('seqrunid'),
                                       new_seqrun_status))
                            charon_session.seqrun_update(
                                project_id,
                                sample.get('sampleid'),
                                libprep.get('libprepid'),
                                seqrun.get('seqrunid'),
                                alignment_status=new_seqrun_status)
def write_to_charon_alignment_results(base_path, project_name, project_id, sample_id, libprep_id, seqrun_id):
    """Update the status of a sequencing run after alignment.

    :param str project_name: The name of the project (e.g. T.Durden_14_01)
    :param str project_id: The id of the project (e.g. P1171)
    :param str sample_id: ...
    :param str libprep_id: ...
    :param str seqrun_id: ...

    :raises RuntimeError: If the Charon database could not be updated
    :raises ValueError: If the output data could not be parsed.
    """
    charon_session = CharonSession()
    try:
        seqrun_dict = charon_session.seqrun_get(project_id, sample_id, libprep_id, seqrun_id)
    except CharonError as e:
        raise CharonError('Error accessing database for project "{}", sample {}; '
                           'could not update Charon while performing best practice: '
                           '{}'.format(project_name, sample_id,  e))
    piper_run_id = seqrun_id.split("_")[3]
    seqrun_dict["lanes"] = 0
    if seqrun_dict.get("alignment_status") == "DONE":
        LOG.warn("Sequencing run \"{}\" marked as DONE but writing new alignment results; "
                 "this will overwrite the previous results.".format(seqrun_id))
    # Find all the appropriate files
    piper_result_dir = os.path.join(base_path, "ANALYSIS", project_name, "02_preliminary_alignment_qc")
    try:
        os.path.isdir(piper_result_dir) and os.listdir(piper_result_dir)
    except OSError as e:
        raise ValueError("Piper result directory \"{}\" inaccessible when updating stats to Charon: {}.".format(piper_result_dir, e))
    piper_qc_dir_base = "{}.{}.{}".format(sample_id, piper_run_id, sample_id)
    piper_qc_path = "{}*/".format(os.path.join(piper_result_dir, piper_qc_dir_base))
    piper_qc_dirs = glob.glob(piper_qc_path)
    if not piper_qc_dirs: # Something went wrong in the alignment or we can't parse the file format
        raise ValueError("Piper qc directories under \"{}\" are missing or in an unexpected format when updating stats to Charon.".format(piper_qc_path))

    # Examine each lane and update the dict with its alignment metrics
    for qc_lane in piper_qc_dirs:
        genome_result = os.path.join(qc_lane, "genome_results.txt")
        # This means that if any of the lanes are missing results, the sequencing run is marked as a failure.
        # We should flag this somehow and send an email at some point.
        if not os.path.isfile(genome_result):
            raise ValueError("File \"genome_results.txt\" is missing from Piper result directory \"{}\"".format(piper_result_dir))
        # Get the alignment results for this lane
        lane_alignment_metrics = parse_qualimap_results(genome_result)
        # Update the dict for this lane
        update_seq_run_for_lane(seqrun_dict, lane_alignment_metrics)
    try:
        # Update the seqrun in the Charon database
        charon_session.seqrun_update(**seqrun_dict)
    except CharonError as e:
        error_msg = ('Failed to update run alignment status for run "{}" in project {} '
                     'sample {}, library prep {} to  Charon database: {}'.format(seqrun_id,
                      project_name, sample_id, libprep_id, e))
        raise CharonError(error_msg)
def analyze(analysis_object, config=None, config_file_path=None):

    charon_session = CharonSession()
    charon_pj=charon_session.project_get(analysis_object.project.project_id)
    reference_genome=charon_pj.get('reference')
    if charon_pj.get("sequencing_facility") == "NGI-S":
        analysis_object.sequencing_facility="sthlm"
    elif charon_pj.get("sequencing_facility") == "NGI-U":
        analysis_object.sequencing_facility="upps"
    else:
        LOG.error("charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?")
        raise RuntimeError
    fastq_files=[]
    if reference_genome and reference_genome != 'other':
        for sample in analysis_object.project:
            try:
                charon_reported_status = charon_session.sample_get(analysis_object.project.project_id,
                                                                   sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                do_analyze=handle_sample_status(analysis_object, sample, charon_reported_status)
                if not do_analyze :
                    continue
            except CharonError as e:
                LOG.error(e)

            for libprep in sample:
                charon_lp_status=charon_session.libprep_get(analysis_object.project.project_id, sample.name, libprep.name).get('qc')
                do_analyze=handle_libprep_status(analysis_object, libprep, charon_lp_status)
                if not do_analyze :
                    continue
                else:
                    for seqrun in libprep:
                        charon_sr_status=charon_session.seqrun_get(analysis_object.project.project_id, sample.name, libprep.name, seqrun.name).get('alignment_status')
                        do_analyze=handle_seqrun_status(analysis_object, seqrun, charon_sr_status)
                        if not do_analyze :
                            continue
                        else:
                            seqrun.being_analyzed=True
                            sample.being_analyzed = sample.being_analyzed or True
                            # filter out index files from analysis
                            for fastq_file in filter(lambda f: not is_index_file(f), seqrun.fastq_files):
                                fastq_path=os.path.join(analysis_object.project.base_path, "DATA", analysis_object.project.project_id, sample.name, libprep.name, seqrun.name, fastq_file)
                                fastq_files.append(fastq_path)
        
        if not fastq_files:
            LOG.error("No fastq files obtained for the analysis fo project {}, please check the Charon status.".format(analysis_object.project.name))
        else :
            if analysis_object.restart_running_jobs:
                stop_ongoing_analysis(analysis_object)
            fastq_dir=preprocess_analysis(analysis_object, fastq_files)
            sbatch_path=write_batch_job(analysis_object, reference_genome, fastq_dir)
            job_id=start_analysis(sbatch_path)
            analysis_path=os.path.join(analysis_object.project.base_path, "ANALYSIS", analysis_object.project.project_id, 'rna_ngi')
            record_project_job(analysis_object.project, job_id, analysis_path)
示例#21
0
def analyze_sample(project, sample, config=None, config_file_path=None):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)
    """
    modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"]
    load_modules(modules_to_load)
    charon_session = CharonSession()
    # Determine if we can begin sample-level processing yet.
    # Conditions are that the coverage is above 28.9X
    # If these conditions become more complex we can create a function for this
    sample_total_autosomal_coverage = charon_session.sample_get(project.project_id,
                                     sample.name).get('total_autosomal_coverage')
    if sample_total_autosomal_coverage > 28.4:
        LOG.info('Sample "{}" in project "{}" is ready for processing.'.format(sample, project))
        for workflow_subtask in get_subtasks_for_level(level="sample"):
            if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask,
                                                    project_id=project.project_id,
                                                    sample_id=sample.name):
                try:
                    ## Temporarily logging to a file until we get ELK set up
                    log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                         project_base_path=project.base_path,
                                                         project_name=project.name,
                                                         sample_id=sample.name)
                    rotate_log(log_file_path)
                    # Store the exit code of detached processes
                    exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                                project_base_path=project.base_path,
                                                                project_name=project.name,
                                                                sample_id=sample.name)

                    build_setup_xml(project, config, sample)
                    command_line = build_piper_cl(project, workflow_subtask, exit_code_path, config)
                    p_handle = launch_piper_job(command_line, project, log_file_path)
                    try:
                        record_process_sample(project=project, sample=sample,
                                              workflow_subtask=workflow_subtask,
                                              analysis_module_name="piper_ngi",
                                              analysis_dir=project.analysis_dir,
                                              pid=p_handle.pid)
                    except RuntimeError as e:
                        LOG.error(e)
                        continue
                except (NotImplementedError, RuntimeError) as e:
                    error_msg = ('Processing project "{}" / sample "{}" failed: '
                                 '{}'.format(project, sample, e.__repr__()))
                    LOG.error(error_msg)
    else:
        LOG.info('Sample "{}" in project "{}" is not yet ready for '
                 'processing.'.format(sample, project))
示例#22
0
def get_valid_seqruns_for_sample(project_id, sample_id,
                                 include_failed_libpreps=False,
                                 include_done_seqruns=False,
                                 status_field="alignment_status"):
    """Find all the valid seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample
    :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC
    :param bool include_done_seqruns: Include seqruns that are already marked DONE

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict

    :raises ValueError: If status_field is not a valid value
    """
    valid_status_values = ("alignment_status", "genotype_status",)
    if status_field not in valid_status_values:
        raise ValueError('"status_field" argument must be one of {} '
                         '(value passed was "{}")'.format(", ".join(valid_status_values),
                                                          status_field))
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                try:
                    aln_status = charon_session.seqrun_get(projectid=project_id,
                                                           sampleid=sample_id,
                                                           libprepid=libprep_id,
                                                           seqrunid=seqrun_id)[status_field]
                except KeyError:
                    LOG.error('Field "{}" not available for seqrun "{}" in Charon '
                              'for project "{}" / sample "{}". Including as '
                              'valid.'.format(status_field, seqrun_id,
                                              project_id, sample_id))
                    aln_status = None
                if aln_status != "DONE" or include_done_seqruns:
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.info('Skipping seqrun "{}" due to {}'
                             '"{}"'.format(seqrun_id,status_field, aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#23
0
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    best_practice_analysis = charon_session.project_get(project.project_id)["best_practice_analysis"]
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
    def get_staged_samples_from_charon(self):
        charon_session = CharonSession()
        result = charon_session.project_get_samples(self.projectid)
        samples = result.get('samples')
        if samples is None:
            raise AssertionError('CharonSession returned no results for project {}'.format(self.projectid))

        staged_samples = []
        for sample in samples:
            sample_id = sample.get('sampleid')
            delivery_status = sample.get('delivery_status')
            if delivery_status == 'STAGED':
                staged_samples.append(sample_id)
        return staged_samples
def main(inbox=None, num_days=14, genotype_files=None, config=None, config_file_path=None):
    if genotype_files:
        gt_files_valid = [os.path.abspath(gt_file) for gt_file in genotype_files]
    else:
        if not inbox:
            try:
                inboxes = config["environment"]["flowcell_inbox"]
            except (KeyError, TypeError):
                raise ValueError("No path to delivery inbox specified by argument "
                                 "or in configuration file ({}). Exiting.".format(config_file_path))
        for inbox in inboxes:
            inbox = os.path.abspath(inbox)
            # Convert to seconds
            cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60)
            LOG.info("Searching for genotype files under {} modified after "
                     "{}".format(inbox, time.ctime(cutoff_age)))
            gt_files_valid = []
            for gt_file in filter(GENOTYPE_FILE_RE.match, glob.glob(os.path.join(inbox, "*"))):
                if os.stat(gt_file).st_mtime > time.time() - cutoff_age:
                    gt_files_valid.append(os.path.abspath(gt_file))

    if not gt_files_valid:
        LOG.info("No genotype files found under {} newer than "
                 "{}".format(inbox, time.ctime(cutoff_age)))
    else:
        charon_session = CharonSession()
        for gt_file_path in gt_files_valid:
            project_samples_dict = \
                    find_projects_from_samples(parse_samples_from_vcf(gt_file_path))
            for project_id, samples in project_samples_dict.iteritems():
                LOG.info("Updating project {}...".format(project_id))
                for sample in samples:
                    try:
                        genotype_status = \
                            charon_session.sample_get(projectid=project_id,
                                                      sampleid=sample).get("genotype_status")
                        if genotype_status in (None, "NOT_AVAILABLE"):
                            LOG.info('Updating sample {} genotype_status '
                                     'to "AVAILABLE"...'.format(sample))
                            charon_session.sample_update(projectid=project_id,
                                                         sampleid=sample,
                                                         genotype_status="AVAILABLE")
                        else:
                            LOG.info('Not updating sample {} genotype_status '
                                     '(already "{}")'.format(sample, genotype_status))
                    except CharonError as e:
                        LOG.error('Could not update genotype status to "AVAILABLE" '
                                  'for project/sample "{}/{}": {}'.format(project_id,
                                                                          sample,
                                                                          e))
示例#26
0
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    best_practice_analysis = charon_session.project_get(
        project.project_id)["best_practice_analysis"]
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
示例#27
0
def find_projects_from_samples(sample_list):
    """Given a list of samples, attempts to determine
    which projects they belong to using Charon records.

    :param list sample_list: A list of the samples for which to find projects

    :returns: a dict of {project_id: set(samples)}
    :rtype: dict of sets

    :raises ValueError: If you fail to pass in a list. Nice work!
    """
    STHLM_SAMPLE_RE = re.compile(r'(P\d{4})_')
    projects_dict = collections.defaultdict(set)
    samples_by_project_id = {}
    no_owners_found = set()
    multiple_owners_found = set()
    charon_session = CharonSession()
    if not type(sample_list) is list:
        raise ValueError("Input should be list.")

    for sample_name in sample_list:
        # First see if we can just parse out the project id from the sample name
        m = STHLM_SAMPLE_RE.match(sample_name)
        if m:
            project_id = m.groups()[0]
            try:
                # Ensure that we guessed right
                charon_session.sample_get(project_id, sample_name)
            except CharonError as e:
                LOG.debug('Project for sample "{}" appears to be "{}" but is not '
                          'present in Charon ({})'.format(sample_name, project_id, e))
                no_owners_found.add(sample_name)
            else:
                projects_dict[project_id].add(sample_name)
        else:
            # Otherwise check all the projects for matching samples (returns list or None)
            owner_projects_list = charon_session.sample_get_projects(sample_name)
            if not owner_projects_list:
                no_owners_found.add(sample_name)
            elif len(owner_projects_list) > 1:
                multiple_owners_found.add(sample_name)
            else:
                projects_dict[owner_projects_list[0]].add(sample_name)
    if no_owners_found:
        LOG.warn("No projects found for the following samples: {}".format(", ".join(no_owners_found)))
    if multiple_owners_found:
        LOG.warn('Multiple projects found with the following samples (owner '
                 'could not be unamibugously determined): {}'.format(", ".join(multiple_owners_found)))
    return dict(projects_dict)
示例#28
0
 def get_samples_from_charon(self, delivery_status='STAGED'):
     """Takes as input a delivery status and return all samples with that delivery status
     """
     charon_session = CharonSession()
     result = charon_session.project_get_samples(self.projectid)
     samples = result.get('samples')
     if samples is None:
         raise AssertionError('CharonSession returned no results for project {}'.format(self.projectid))
     samples_of_interest = []
     for sample in samples:
         sample_id = sample.get('sampleid')
         charon_delivery_status = sample.get('delivery_status')
         if charon_delivery_status == delivery_status or delivery_status is None:
             samples_of_interest.append(sample_id)
     return samples_of_interest
示例#29
0
def reset_charon_records_by_object(project_obj):
    charon_session = CharonSession()
    LOG.info("Resetting Charon record for project {}".format(project_obj))
    charon_session.project_reset(projectid=project_obj.project_id)
    LOG.info("Charon record for project {} reset".format(project_obj))
    for sample_obj in project_obj:
        LOG.info("Resetting Charon record for project/sample {}/{}".format(project_obj,
                                                                           sample_obj))
        try:
            charon_session.sample_reset(projectid=project_obj.project_id,
                                        sampleid=sample_obj.name)
            LOG.info("Charon record for project/sample {}/{} reset".format(project_obj,
                                                                           sample_obj))
        except CharonError as e:
            LOG.error("Unable to reset Charon record for project/sample {}/{}: "
                      "{}".format(project_obj, sample_obj, e))
        for libprep_obj in sample_obj:
            LOG.info("Resetting Charon record for project/sample"
                     "libprep {}/{}/{}".format(project_obj, sample_obj, libprep_obj))
            try:
                charon_session.libprep_reset(projectid=project_obj.project_id,
                                             sampleid=sample_obj.name,
                                             libprepid=libprep_obj.name)
                LOG.info("Charon record for project/sample/libprep {}/{}/{} "
                         "reset".format(project_obj, sample_obj, libprep_obj))
            except CharonError as e:
                LOG.error("Unable to reset Charon record for project/sample/libprep "
                          "{}/{}/{}: {}".format(project_obj, sample_obj,
                                                libprep_obj, e))
            for seqrun_obj in libprep_obj:
                LOG.info("Resetting Charon record for project/sample/libprep/"
                         "seqrun {}/{}/{}/{}".format(project_obj, sample_obj,
                                                     libprep_obj, seqrun_obj))
                try:
                    charon_session.seqrun_reset(projectid=project_obj.project_id,
                                                sampleid=sample_obj.name,
                                                libprepid=libprep_obj.name,
                                                seqrunid=seqrun_obj.name)
                    LOG.info("Charon record for project/sample/libprep/seqrun "
                             "{}/{}/{}/{} reset".format(project_obj, sample_obj,
                                                        libprep_obj, seqrun_obj))
                except CharonError as e:
                    LOG.error("Unable to reset Charon record for project/sample/"
                              "libprep/seqrun {}/{}/{}/{}: {}".format(project_obj,
                                                                      sample_obj,
                                                                      libprep_obj,
                                                                      seqrun_obj,
                                                                      e))
示例#30
0
def record_project_job(project,
                       job_id,
                       analysis_dir,
                       workflow=None,
                       engine='rna_ngi',
                       run_mode='local',
                       config=None,
                       config_file_path=None):
    with get_session() as db_session:
        project_db_obj = ProjectAnalysis(project_id=project.project_id,
                                         job_id=job_id,
                                         project_name=project.name,
                                         project_base_path=project.base_path,
                                         workflow=workflow,
                                         engine=engine,
                                         analysis_dir=analysis_dir,
                                         run_mode=run_mode)

        db_session.add(project_db_obj)
        db_session.commit()
        sample_status_value = "UNDER_ANALYSIS"
        for sample in project:
            if sample.being_analyzed:
                try:
                    LOG.info('Updating Charon status for project/sample '
                             '{}/{} : {}'.format(project, sample,
                                                 sample_status_value))
                    CharonSession().sample_update(
                        projectid=project.project_id,
                        sampleid=sample.name,
                        analysis_status=sample_status_value)

                    for libprep in sample:
                        if CharonSession().libprep_get(
                                project.project_id, sample.name,
                                libprep.name).get('qc') != "FAILED":
                            for seqrun in libprep:
                                if seqrun.being_analyzed:
                                    CharonSession().seqrun_update(
                                        project.project_id,
                                        sample.name,
                                        libprep.name,
                                        seqrun.name,
                                        alignment_status="RUNNING")
                except Exception as e:
                    LOG.error(
                        "Could not update Charon for sample {}/{} : {}".format(
                            project.project_id, sample.name, e))
 def setUp(self):
     # Create the test project
     self.project_id = "P100000"
     self.project_name = "P.Mayhem_14_01"
     self.project_data = dict(projectid=self.project_id,
                              name=self.project_name,
                              status=None)
     self.session = CharonSession()
     response = self.session.post(
         self.session.construct_charon_url('project'),
         data=json.dumps(self.project_data))
     assert response.status_code == 201, "Could not create test project in Charon: {}".format(
         response.reason)
     project = response.json()
     assert project[
         'projectid'] == self.project_id, "Test project ID is incorrect"
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir,
                                       config=None, config_file_path=None):
    """Find all the valid seqruns for a particular sample, parse their
    qualimap output files, and update Charon with the mean autosomal
    coverage for each.

    :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing)
    :param str sample_id: The sample name (e.g. P1170_105)

    :raises OSError: If the qc path specified is missing or otherwise inaccessible
    :raises ValueError: If arguments are incorrect
    """
    seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id)

    charon_session = CharonSession()
    for libprep_id, seqruns in seqruns_by_libprep.iteritems():
        for seqrun_id in seqruns:
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt"))
            ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id)

            reads=0
            for path in genome_results_file_paths:
                try:
                    reads += parse_qualimap_reads(path)
                except IOError as e :
                    LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path))
                except :
                    LOG.error("Error in handling the genome_results.txt file located at {}".format(path))

            LOG.info('Updating project/sample/libprep/seqrun "{}" in '
                     'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             total_reads=reads,
                                             mean_autosomal_coverage=ma_coverage)
            except CharonError as e:
                error_text = ('Could not update project/sample/libprep/seqrun "{}" '
                              'in Charon with mean autosomal coverage '
                              '"{}": {}'.format(label, ma_coverage, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_id,
                                  engine_name="piper_ngi", level="ERROR", info_text=error_text)
示例#33
0
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir,
                                       config=None, config_file_path=None):
    """Find all the valid seqruns for a particular sample, parse their
    qualimap output files, and update Charon with the mean autosomal
    coverage for each.

    :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing)
    :param str sample_id: The sample name (e.g. P1170_105)

    :raises OSError: If the qc path specified is missing or otherwise inaccessible
    :raises ValueError: If arguments are incorrect
    """
    seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id)

    charon_session = CharonSession()
    for libprep_id, seqruns in seqruns_by_libprep.items():
        for seqrun_id in seqruns:
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt"))
            ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id)

            reads=0
            for path in genome_results_file_paths:
                try:
                    reads += parse_qualimap_reads(path)
                except IOError as e :
                    LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path))
                except :
                    LOG.error("Error in handling the genome_results.txt file located at {}".format(path))

            LOG.info('Updating project/sample/libprep/seqrun "{}" in '
                     'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             total_reads=reads,
                                             mean_autosomal_coverage=ma_coverage)
            except CharonError as e:
                error_text = ('Could not update project/sample/libprep/seqrun "{}" '
                              'in Charon with mean autosomal coverage '
                              '"{}": {}'.format(label, ma_coverage, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_id,
                                  engine_name="piper_ngi", level="ERROR", info_text=error_text)
    def test_create_charon_entries_from_project(self):
        # Create the NGIObjects
        project_obj = NGIProject(name=self.p_name,
                                 dirname=self.p_name,
                                 project_id=self.p_id,
                                 base_path=self.p_bp)
        sample_obj = project_obj.add_sample(name=self.s_id, dirname=self.s_id)
        libprep_obj = sample_obj.add_libprep(name=self.l_id, dirname=self.l_id)
        seqrun_obj = libprep_obj.add_seqrun(name=self.sr_id,
                                            dirname=self.sr_id)

        try:
            # Create them in the db
            create_charon_entries_from_project(project_obj)
        finally:
            charon_session = CharonSession()
            charon_session.project_delete(project_obj.project_id)
示例#35
0
def recreate_project_from_db(analysis_top_dir, project_name, project_id):
    project_dir = os.path.join(analysis_top_dir, "DATA", project_name)
    project_obj = NGIProject(name=project_name,
                             dirname=project_name,
                             project_id=project_id,
                             base_path=analysis_top_dir)
    charon_session = CharonSession()
    try:
        samples_dict = charon_session.project_get_samples(
            project_id)["samples"]
    except CharonError as e:
        raise RuntimeError(
            "Could not access samples for project {}: {}".format(
                project_id, e))
    for sample in samples_dict:
        sample_id = sample.get("sampleid")
        sample_dir = os.path.join(project_dir, sample_id)
        sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id)
        sample_obj.status = sample.get("status", "unknown")
        try:
            libpreps_dict = charon_session.sample_get_libpreps(
                project_id, sample_id)["libpreps"]
        except CharonError as e:
            raise RuntimeError(
                "Could not access libpreps for project {} / sample {}: {}".
                format(project_id, sample_id, e))
        for libprep in libpreps_dict:
            libprep_id = libprep.get("libprepid")
            libprep_obj = sample_obj.add_libprep(name=libprep_id,
                                                 dirname=libprep_id)
            libprep_obj.status = libprep.get("status", "unknown")
            try:
                seqruns_dict = charon_session.libprep_get_seqruns(
                    project_id, sample_id, libprep_id)["seqruns"]
            except CharonError as e:
                raise RuntimeError(
                    "Could not access seqruns for project {} / sample {} / "
                    "libprep {}: {}".format(project_id, sample_id, libprep_id,
                                            e))
            for seqrun in seqruns_dict:
                # e.g. 140528_D00415_0049_BC423WACXX
                seqrun_id = seqrun.get("seqrunid")
                seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id,
                                                    dirname=seqrun_id)
                seqrun_obj.status = seqrun.get("status", "unknown")
    return project_obj
示例#36
0
 def add_supr_name_delivery_in_charon(self, supr_name_of_delivery):
     '''Updates delivery_projects in Charon at project level
     '''
     charon_session = CharonSession()
     try:
         #fetch the project
         sample_charon = charon_session.sample_get(self.projectid, self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if supr_name_of_delivery not in sample_charon:
             delivery_projects.append(supr_name_of_delivery)
             charon_session.sample_update(self.projectid, self.sampleid, delivery_projects=delivery_projects)
             logger.info('Charon delivery_projects for sample {} updated with value {}'.format(self.sampleid, supr_name_of_delivery))
         else:
             logger.warn('Charon delivery_projects for sample {} not updated with value {} because the value was already present'.format(self.sampleid, supr_name_of_delivery))
     except Exception, e:
         logger.error('Failed to update delivery_projects in charon while delivering {}. Error says: {}'.format(self.sampleid, e))
         logger.exception(e)
示例#37
0
def update_coverage_for_sample_seqruns(project_id,
                                       sample_id,
                                       piper_qc_dir,
                                       config=None,
                                       config_file_path=None):
    """Find all the valid seqruns for a particular sample, parse their
    qualimap output files, and update Charon with the mean autosomal
    coverage for each.

    :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing)
    :param str sample_id: The sample name (e.g. P1170_105)

    :raises OSError: If the qc path specified is missing or otherwise inaccessible
    :raises ValueError: If arguments are incorrect
    """
    seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id)

    charon_session = CharonSession()
    for libprep_id, seqruns in seqruns_by_libprep.iteritems():
        for seqrun_id in seqruns:
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id,
                                         seqrun_id)
            ma_coverage = parse_mean_coverage_from_qualimap(
                piper_qc_dir, sample_id, seqrun_id)
            LOG.info('Updating project/sample/libprep/seqrun "{}" in '
                     'Charon with mean autosomal coverage "{}"'.format(
                         label, ma_coverage))
            try:
                charon_session.seqrun_update(
                    projectid=project_id,
                    sampleid=sample_id,
                    libprepid=libprep_id,
                    seqrunid=seqrun_id,
                    mean_autosomal_coverage=ma_coverage)
            except CharonError as e:
                error_text = (
                    'Could not update project/sample/libprep/seqrun "{}" '
                    'in Charon with mean autosomal coverage '
                    '"{}": {}'.format(label, ma_coverage, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id,
                                  sample_name=sample_id,
                                  engine_name="piper_ngi",
                                  level="ERROR",
                                  info_text=error_text)
示例#38
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps']
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id,
                                                             sample_name,
                                                             libprep['libprepid'])['seqruns']
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            ## BUG if we have one sample with two libpreps on the same flowcell,
                            ##     this just picks the first one it encounters; instead,
                            ##     it should raise an Exception. Requires restructuring.
                            return libprep['libprepid']
                else:
                    raise CharonError("No seqruns found!", 404)
            else:
                raise CharonError("No match", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError('No library prep found for project "{}" / sample "{}" '
                             '/ fcid "{}"'.format(project_id, sample_name, fcid))
        else:
            raise ValueError('Could not determine library prep for project "{}" '
                             '/ sample "{}" / fcid "{}": {}'.format(project_id,
                                                                    sample_name,
                                                                    fcid,
                                                                    e))
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    try:
        best_practice_analysis = charon_session.project_get(project.project_id)["best_practice_analysis"]
    except KeyError:
        error_msg = ('No best practice analysis specified in Charon for '
                     'project "{}". Using "whole_genome_reseq"'.format(project))
        raise RuntimeError(error_msg)
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
示例#40
0
def write_status_to_charon(project_id, return_code):
    """Update the status of a workflow for a project in the Charon database.

    :param NGIProject project_id: The name of the project
    :param int return_code: The return code of the workflow process

    :raises RuntimeError: If the Charon database could not be updated
    """
    ## Consider keeping on CharonSession open. What's the time savings?
    charon_session = CharonSession()
    ## Is "CLOSED" correct here?
    status = "CLOSED" if return_code is 0 else "FAILED"
    try:
        charon_session.project_update(project_id, status=status)
    except CharonError as e:
        error_msg = ('Failed to update project status to "{}" for "{}" '
                     'in Charon database: {}'.format(status, project_id, e))
        raise RuntimeError(error_msg)
示例#41
0
 def add_dds_name_delivery_in_charon(self, name_of_delivery):
     """Updates delivery_projects in Charon at project level
     """
     charon_session = CharonSession()
     try:
         # Fetch the project
         sample_charon = charon_session.sample_get(self.projectid, self.sampleid)
         delivery_projects = sample_charon['delivery_projects']
         if name_of_delivery not in sample_charon:
             delivery_projects.append(name_of_delivery)
             charon_session.sample_update(self.projectid, self.sampleid, delivery_projects=delivery_projects)
             logger.info('Charon delivery_projects for sample {} updated '
                         'with value {}'.format(self.sampleid, name_of_delivery))
         else:
             logger.warn('Charon delivery_projects for sample {} not updated '
                         'with value {} because the value was already present'.format(self.sampleid, name_of_delivery))
     except Exception as e:
         logger.exception('Failed to update delivery_projects in charon while delivering {}.'.format(self.sampleid))
示例#42
0
def get_engine_for_bp(project, config=None, config_file_path=None):
    """returns a analysis engine module for the given project.

    :param NGIProject project: The project to get the engine from.
    """
    charon_session = CharonSession()
    try:
        best_practice_analysis = charon_session.project_get(project.project_id)["best_practice_analysis"]
    except KeyError:
        error_msg = ('No best practice analysis specified in Charon for '
                     'project "{}". Using "whole_genome_reseq"'.format(project))
        raise RuntimeError(error_msg)
    try:
        analysis_module = load_engine_module(best_practice_analysis, config)
    except RuntimeError as e:
        raise RuntimeError('Project "{}": {}'.format(project, e))
    else:
        return analysis_module
def write_status_to_charon(project_id, return_code):
    """Update the status of a workflow for a project in the Charon database.

    :param NGIProject project_id: The name of the project
    :param int return_code: The return code of the workflow process

    :raises RuntimeError: If the Charon database could not be updated
    """
    ## Consider keeping on CharonSession open. What's the time savings?
    charon_session = CharonSession()
    ## Is "CLOSED" correct here?
    status = "CLOSED" if return_code is 0 else "FAILED"
    try:
        charon_session.project_update(project_id, status=status)
    except CharonError as e:
        error_msg = ('Failed to update project status to "{}" for "{}" '
                     'in Charon database: {}'.format(status, project_id, e))
        raise RuntimeError(error_msg)
示例#44
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)["libpreps"]
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep["libprepid"])["seqruns"]
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            ## BUG if we have one sample with two libpreps on the same flowcell,
                            ##     this just picks the first one it encounters; instead,
                            ##     it should raise an Exception. Requires restructuring.
                            return libprep["libprepid"]
                else:
                    raise CharonError("No seqruns found!", 404)
            else:
                raise CharonError("No match", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError(
                'No library prep found for project "{}" / sample "{}" '
                '/ fcid "{}"'.format(project_id, sample_name, fcid)
            )
        else:
            raise ValueError(
                'Could not determine library prep for project "{}" '
                '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e)
            )
示例#45
0
    def test_workflows(self):
        config_file_path = locate_ngi_config()
        config = load_yaml_config(config_file_path)

        for workflow_name, workflow_dict in config.get("test_data", {}).get("workflows", {}).iteritems():
            # Load and rewrite config file as needed
            customize_config_dict = workflow_dict.get("customize_config")
            if customize_config_dict:
                config = update_dict(config, customize_config_dict)

            #self._install_test_files(workflow_dict)
            LOG.info('Starting test analysis pipeline for workflow "{}"'.format(workflow_name))
            try:
                local_files = workflow_dict["local_files"]
            except KeyError:
                raise ValueError("Required paths to input files for testing do not"
                                 "exist in config file (test_data.workflows."
                                 "{}.local_files); cannot proceed.".format(workflow_name))
            try:
                flowcell_path = local_files["flowcell"]
            except KeyError:
                raise ValueError("Path to flowcell is required and not specified "
                                 "in configuration file (test_data.workflows."
                                 "{}.local_files.flowcell); cannot proceed.".format(workflow_name))
            try:
                test_project = workflow_dict["test_project"]
                test_proj_id = test_project["project_id"]
                test_proj_name = test_project["project_name"]
                test_proj_bpa = test_project["bpa"]
            except KeyError as e:
                raise ValueError("Test project information is missing from config "
                                 "file (under test_data.workflows.{}.test_project "
                                 "({}); cannot proceed.".format(workflow_name, e.msg))
            charon_session = CharonSession(config=config)
            try:
                charon_session.project_delete(projectid=test_proj_id)
            except CharonError:
                pass
            charon_session.project_create(projectid=test_proj_id, name=test_proj_name,
                                          status="OPEN", best_practice_analysis=test_proj_bpa)

            process_demultiplexed_flowcells([flowcell_path], fallback_libprep="A",
                                            config=config)
示例#46
0
    def test_create_charon_entries_from_project(self):
        # Create the NGIObjects
        project_obj = NGIProject(name=self.p_name,
                                 dirname=self.p_name,
                                 project_id=self.p_id,
                                 base_path=self.p_bp)
        sample_obj = project_obj.add_sample(name=self.s_id,
                                            dirname=self.s_id)
        libprep_obj = sample_obj.add_libprep(name=self.l_id,
                                             dirname=self.l_id)
        seqrun_obj = libprep_obj.add_seqrun(name=self.sr_id,
                                            dirname=self.sr_id)

        try:
        # Create them in the db
            create_charon_entries_from_project(project_obj)
        finally:
            charon_session = CharonSession()
            charon_session.project_delete(project_obj.project_id)
示例#47
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps']
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id,
                                                             sample_name,
                                                             libprep['libprepid'])['seqruns']
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            return libprep['libprepid']
                    else:
                        raise CharonError("No match", 404)
                else:
                    raise CharonError("No seqruns found!", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError('No library prep found for project "{}" / sample "{}" '
                             '/ fcid "{}"'.format(project_id, sample_name, fcid))
        else:
            raise ValueError('Could not determine library prep for project "{}" '
                             '/ sample "{}" / fcid "{}": {}'.format(project_id,
                                                                    sample_name,
                                                                    fcid,
                                                                    e))
示例#48
0
    def __init__(self, config, log, charon_session=None):
        """
        Create a CharonConnector object which provides an interface to the Charon sample tracking database.

        :param config: dict with configuration options
        :param log: a log handle where the connector will log its output
        :param charon_session: an active database session to use, if not specified, a new session will be created
        """
        self.config = config
        self.log = log
        self.charon_session = charon_session or CharonSession(config=self.config)
 def setUpClass(cls):
     cls.session = CharonSession()
     # Project
     cls.p_id = "P100000"
     cls.p_name = "Y.Mom_14_01"
     cls.p_bp = tempfile.mkdtemp()
     # Sample
     cls.s_id = "{}_101".format(cls.p_id)
     # Libprep
     cls.l_id = "A"
     # Seqrun
     cls.sr_id = generate_run_id()
示例#50
0
def check_for_preexisting_sample_runs(project_obj,
                                      sample_obj,
                                      restart_running_jobs,
                                      restart_finished_jobs,
                                      status_field="alignment_status"):
    """If any analysis is undergoing or has completed for this sample's
    seqruns, raise a RuntimeError.

    :param NGIProject project_obj: The project object
    :param NGISample sample_obj: The sample object
    :param boolean restart_running_jobs: command line parameter
    :param boolean restart_finished_jobs: command line parameter
    :param str status_field: The field to check in Charon (seqrun level)

    :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue
    """
    project_id = project_obj.project_id
    sample_id = sample_obj.name
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    for libprep in sample_libpreps['libpreps']:
        libprep_id = libprep['libprepid']
        for seqrun in charon_session.libprep_get_seqruns(
                projectid=project_id, sampleid=sample_id,
                libprepid=libprep_id)['seqruns']:
            seqrun_id = seqrun['seqrunid']
            aln_status = charon_session.seqrun_get(
                projectid=project_id,
                sampleid=sample_id,
                libprepid=libprep_id,
                seqrunid=seqrun_id).get(status_field)
            if (aln_status == "RUNNING" or aln_status == "UNDER_ANALYSIS" and \
                not restart_running_jobs) or \
                (aln_status == "DONE" and not restart_finished_jobs):
                raise RuntimeError('Project/Sample "{}/{}" has a preexisting '
                                   'seqrun "{}" with status "{}"'.format(
                                       project_obj, sample_obj, seqrun_id,
                                       aln_status))
示例#51
0
 def setUpClass(cls):
     cls.session = CharonSession()
     # Project
     cls.p_id = "P100000"
     cls.p_name = "Y.Mom_14_01"
     # Sample
     cls.s_id = "{}_101".format(cls.p_id)
     # Libprep
     cls.l_id = "A"
     # Seqrun
     cls.sr_id = generate_run_id()
     cls.sr_total_reads = 1000000
     cls.sr_mac = 30
示例#52
0
def record_process_sample(project, sample, workflow_subtask, analysis_module_name,
                          process_id=None, slurm_job_id=None, config=None):
    LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", '
             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
    with get_db_session() as session:
        sample_db_obj = SampleAnalysis(project_id=project.project_id,
                                       project_name=project.name,
                                       project_base_path=project.base_path,
                                       sample_id=sample.name,
                                       engine=analysis_module_name,
                                       workflow=workflow_subtask,
                                       process_id=process_id,
                                       slurm_job_id=slurm_job_id)
        try:
            session.add(sample_db_obj)
            for attempts in range(3):
                try:
                    session.commit()
                    LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", '
                             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
                    break
                except OperationalError as e:
                    LOG.warn('Database locked ("{}"). Waiting...'.format(e))
                    time.sleep(15)
            else:
                raise RuntimeError("Could not write to database after three attempts (locked?)")
        except (IntegrityError, RuntimeError):
            raise RuntimeError('Could not record slurm job id "{}" for project "{}", sample "{}", '
                               'workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e))
    try:
        set_status = "UNDER_ANALYSIS"
        LOG.info(('Updating Charon status for project/sample '
                  '{}/{} to {}').format(project, sample, set_status))
        CharonSession().sample_update(projectid=project.project_id,
                                      sampleid=sample.name,
                                      analysis_status=set_status)
        project_obj = create_project_obj_from_analysis_log(project.name,
                                                           project.project_id,
                                                           project.base_path,
                                                           sample.name,
                                                           workflow_subtask)
        recurse_status_for_sample(project_obj, "RUNNING")
    except CharonError as e:
        error_text = ('Could not update Charon status for project/sample '
                      '{}/{} due to error: {}'.format(project, sample, e))

        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id, sample_name=sample_id,
                      engine_name='piper_ngi', level="ERROR", info_text=error_text)
示例#53
0
class DbConnections():
    def __init__(self):
        with open(os.getenv('STATUS_DB_CONFIG'), 'r') as db_cred_file:
            db_conf = yaml.load(db_cred_file)['statusdb']
        self.statusdbSess = sdb(db_conf, db="projects")
        self.CharonSess = CharonSession()

    def add_delivery_proj_in_charon(self, delivery_proj, projectid):
        '''Updates delivery_projects in Charon at project level
        '''
        try:
            #fetch the project
            project_charon = self.CharonSess.project_get(projectid)
            delivery_projects = project_charon['delivery_projects']
            if delivery_proj not in delivery_projects:
                delivery_projects.append(delivery_proj)
                self.CharonSess.project_update(projectid, delivery_projects=delivery_projects)
                logger.info('Charon delivery_projects for project {} updated with value {}'.format(projectid, delivery_proj))
            else:
                logger.warn('Charon delivery_projects for project {} not updated with value {} because the value was already present'.format(projectid, delivery_proj))
        except Exception, e:
            logger.error('Failed to update delivery_projects in charon for {}. Error says: {}'.format(projectid, e))
            logger.exception(e)
示例#54
0
def get_valid_seqruns_for_sample(project_id, sample_id,
                                 include_failed_libpreps=False,
                                 include_done_seqruns=False):
    """Find all the valid seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample
    :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC
    :param bool include_done_seqruns: Include seqruns that are already marked DONE

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict
    """
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                aln_status = charon_session.seqrun_get(projectid=project_id,
                                                       sampleid=sample_id,
                                                       libprepid=libprep_id,
                                                       seqrunid=seqrun_id).get('alignment_status')
                if aln_status != "DONE" or include_done_seqruns:
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.info('Skipping seqrun "{}" due to alignment_status '
                             '"{}"'.format(seqrun_id, aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#55
0
def recurse_status_for_sample(project_obj, status_field, status_value, update_done=False,
                              extra_args=None, config=None, config_file_path=None):
    """Set seqruns under sample to have status for field <status_field> to <status_value>
    """

    if not extra_args:
        extra_args = {}
    extra_args.update({status_field: status_value})
    charon_session = CharonSession()
    project_id = project_obj.project_id
    for sample_obj in project_obj:
        # There's only one sample but this is an iterator so we iterate
        sample_id = sample_obj.name
        for libprep_obj in sample_obj:
            libprep_id = libprep_obj.name
            for seqrun_obj in libprep_obj:
                seqrun_id = seqrun_obj.name
                label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
                LOG.info('Updating status for field "{}" of project/sample/libprep/seqrun '
                         '"{}" to "{}" in Charon '.format(status_field, label, status_value))
                try:
                    charon_session.seqrun_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 libprepid=libprep_id,
                                                 seqrunid=seqrun_id,
                                                 **extra_args)
                except CharonError as e:
                    error_text = ('Could not update {} for project/sample/libprep/seqrun '
                                  '"{}" in Charon to "{}": {}'.format(status_field,
                                                                      label,
                                                                      status_value,
                                                                      e))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_id, sample_name=sample_obj.name,
                                      level="ERROR", info_text=error_text, workflow=status_field)