def write_to_charon_NGI_results(job_id, return_code, run_dir): """Update the status of a sequencing run after alignment. :param NGIProject project_id: The name of the project, sample, lib prep, flowcell id :param int return_code: The return code of the workflow process :param string run_dir: the directory where results are stored (I know that I am running piper) :raises RuntimeError: If the Charon database could not be updated """ charon_session = CharonSession() # Consider moving this mapping to the CharonSession object or something if return_code is None: status = "RUNNING" elif return_code == 0: status = "DONE" else: ## TODO we need to differentiate between COMPUTATION_FAILED and DATA_FAILED ## also there is IGNORE? status = "COMPUTATION_FAILED" try: m_dict = STHLM_UUSNP_SAMPLE_RE.match(job_id).groupdict() #m_dict = re.match(r'?P<project_name>\w\.\w+_\d+_\d+|\w{2}-\d+)_(?P<sample_id>[\w-]+)_(?P<libprep_id>\w|\w{2}\d{3}_\2)_(?P<seqrun_id>\d{6}_\w+_\d{4}_.{10})', job_id).groupdict() project_id = get_project_id_from_name(m_dict['project_name']) sample_id = m_dict['sample_id'] except (TypeError, AttributeError): error_msg = "Could not parse project/sample ids from job id \"{}\"; cannot update Charon with results!".format(job_id) raise RuntimeError(error_msg) try: charon_session.sample_update(project_id, sample_id, status=status) except CharonError as e: error_msg = ('Failed to update sample status to "{}" for sample "{}" ' 'in Charon database: {}'.format(status, project_id, sample_id, e)) raise RuntimeError(error_msg)
def update_gtc_for_sample(project_id, sample_id, piper_gtc_path, config=None, config_file_path=None): """Find the genotype concordance file for this sample, if it exists, and update the sample record in Charon with the value parsed from it. :param str project_id: The id of the project :param str sample_id: The id the sample :param str piper_gtc_path: The path to the piper genotype concordance directory :raises CharonError: If there is some Error -- with Charon :raises IOError: If the path specified is missing or inaccessible :raises ValueError: If the specified sample has no data in the gtc file """ gtc_file = os.path.join(piper_gtc_path, "{}.gt_concordance".format(sample_id)) try: concordance_value = parse_genotype_concordance(gtc_file)[sample_id] except KeyError: raise ValueError('Concordance data for sample "{}" not found in gt ' 'concordance file "{}"'.format(sample_id, gtc_file)) gtc_lower_bound = config.get("genotyping", {}).get("lower_bound_cutoff") status_dict = {} if gtc_lower_bound: if concordance_value < concordance_value: status_dict = {"genotype_status": "FAILED"} else: status_dict = {"genotype_status": "PASSED"} charon_session = CharonSession() charon_session.sample_update(projectid=project_id, sampleid=sample_id, genotype_concordance=concordance_value, **status_dict)
def get_project_id_from_name(project_name): """Given the project name ("Y.Mom_14_01") return the project ID ("P123") :param str project_name: The human-friendly name of the project (e.g. "J.Doe_14_01") :returns: The alphanumeric database-friendly name of the project (e.g. "P123") :rtype: str :raises RuntimeError: If there is some problem relating to the GET (HTTP Return code != 200) :raises ValueError: If the project has no project id in the database or if the project does not exist in Charon """ charon_session = CharonSession() try: project_id = charon_session.project_get(project_name) except CharonError as e: if e.status_code == 404: new_e = ValueError('Project "{}" missing from database: {}'.format(project_name, e)) new_e.status_code = 404 raise e else: raise try: return project_id['projectid'] except KeyError: raise ValueError('Couldn\'t retrieve project id for project "{}"; ' 'this project\'s database entry has no "projectid" value.'.format(project))
class TestCommunicate(unittest.TestCase): def setUp(self): # Create the test project self.project_id = "P100000" self.project_name = "P.Mayhem_14_01" self.project_data = dict(projectid=self.project_id, name=self.project_name, status=None) self.session = CharonSession() response = self.session.post(self.session.construct_charon_url('project'), data=json.dumps(self.project_data)) assert response.status_code == 201, "Could not create test project in Charon: {}".format(response.reason) project = response.json() assert project['projectid'] == self.project_id, "Test project ID is incorrect" def tearDown(self): # Remove the test project response = self.session.delete(self.session.construct_charon_url('project', self.project_id)) assert response.status_code == 204, "Could not delete test project from Charon: {}".format(response.reason) def test_get_project_id_from_name(self): # Check that it matches self.assertEqual(self.project_id, get_project_id_from_name(self.project_name)) def test_rebuild_project_obj_from_charon(self): # Create fake project / sample / libprep / seqrun pass
def fetch_charon(context, project, threshold, all_samples): """ Will fetch samples of the specified project from Charon and print the concordance """ try: # get result from charon charon_session = CharonSession() result = charon_session.project_get_samples(project) samples = {} for sample in result.get('samples'): sample_id = sample.get('sampleid') concordance = float(sample.get('genotype_concordance')) status = sample.get('genotype_status') # exclude samples which were not yet checked if status is not None: samples[sample_id] = (concordance, status) # print output if not all_samples and samples: print 'Samples below threshold: {}%'.format(threshold) for sample in sorted(samples.keys()): concordance, status = samples[sample] # if --all, we don't care about threshold if all_samples or concordance <= threshold: # do not print 0% if concordance != 0: print '{} {}% {}'.format(sample, concordance, status) except Exception, e: log.error("Can't fetch Charon. Error says: {}".format(str(e)))
def check_for_preexisting_sample_runs(project_obj, sample_obj, restart_running_jobs, restart_finished_jobs): """If any analysis is undergoing or has completed for this sample's seqruns, raise a RuntimeError. :param NGIProject project_obj: The project object :param NGISample sample_obj: The sample object :param boolean restart_running_jobs: command line parameter :param boolean restart_finished_jobs: command line parameter :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue """ project_id = project_obj.project_id sample_id = sample_obj.name charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) for libprep in sample_libpreps['libpreps']: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id).get('alignment_status') if (aln_status == "RUNNING" and not restart_running_jobs) or \ (aln_status == "DONE" and not restart_finished_jobs): raise RuntimeError('Project/Sample "{}/{}" has a preexisting ' 'seqrun "{}" with status "{}"'.format(project_obj, sample_obj, seqrun_id, aln_status))
def get_finished_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False): """Find all the finished seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict """ charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id).get('alignment_status') if aln_status == "DONE": libpreps[libprep_id].append(seqrun_id) else: LOG.debug('Skipping seqrun "{}" due to alignment_status ' '"{}"'.format(seqrun_id, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def recreate_project_from_db(analysis_top_dir, project_name, project_id): project_dir = os.path.join(analysis_top_dir, "DATA", project_name) project_obj = NGIProject(name=project_name, dirname=project_name, project_id=project_id, base_path=analysis_top_dir) charon_session = CharonSession() try: samples_dict = charon_session.project_get_samples(project_id)["samples"] except CharonError as e: raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e)) for sample in samples_dict: sample_id = sample.get("sampleid") sample_dir = os.path.join(project_dir, sample_id) sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id) sample_obj.status = sample.get("status", "unknown") try: libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"] except CharonError as e: raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e)) for libprep in libpreps_dict: libprep_id = libprep.get("libprepid") libprep_obj = sample_obj.add_libprep(name=libprep_id, dirname=libprep_id) libprep_obj.status = libprep.get("status", "unknown") try: seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"] except CharonError as e: raise RuntimeError("Could not access seqruns for project {} / sample {} / " "libprep {}: {}".format(project_id, sample_id, libprep_id, e)) for seqrun in seqruns_dict: # e.g. 140528_D00415_0049_BC423WACXX seqrun_id = seqrun.get("seqrunid") seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id) seqrun_obj.status = seqrun.get("status", "unknown") return project_obj
def recurse_status_for_sample(project_obj, set_status, update_done=False): """Set seqruns under sample to have status "set_status" """ charon_session = CharonSession() project_id = project_obj.project_id for sample_obj in project_obj: # There's only one sample but this is an iterator sample_id = sample_obj.name for libprep_obj in sample_obj: libprep_id = libprep_obj.name for seqrun_obj in libprep_obj: seqrun_id = seqrun_obj.name label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) LOG.info(('Updating status of project/sample/libprep/seqrun ' '"{}" to "{}" in Charon ').format(label, set_status)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status=set_status) except CharonError as e: error_text =('Could not update status of project/sample/libprep/seqrun ' '"{}" in Charon to "{}": {}'.format(label, set_status, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_obj.name, level="ERROR", info_text=error_text)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises RuntimeError: If you specify both the seqrun_id and fcid and they don't match :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.iteritems(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) ma_coverage = _parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}"'.format(label, ma_coverage)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_sample_duplication_and_coverage(project_id, sample_id, project_base_path, config=None, config_file_path=None): """Update Charon with the duplication rates for said sample. :param str project_base_path: The path to the project dir :param str sample_id: The sample name (e.g. P1170_105) """ dup_file_path = os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '05_processed_alignments', "{}.metrics".format(sample_id)) genome_results_file_path = os.path.join( project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id), "genome_results.txt") try: dup_pc = parse_deduplication_percentage(dup_file_path) except: dup_pc = 0 LOG.error( "Cannot find {}.metrics file for duplication rate at {}. Continuing." .format(sample_id, dup_file_path)) try: cov = parse_qualimap_coverage(genome_results_file_path) reads = parse_qualimap_reads(genome_results_file_path) except IOError as e: cov = 0 reads = 0 LOG.error( "Cannot find genome_results.txt file for sample coverage at {}. Continuing." .format(genome_results_file_path)) try: charon_session = CharonSession() charon_session.sample_update(projectid=project_id, sampleid=sample_id, duplication_pc=dup_pc, total_sequenced_reads=reads, total_autosomal_coverage=cov) LOG.info( 'Updating sample "{}" in ' 'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"' .format(sample_id, dup_pc, cov)) except CharonError as e: error_text = ('Could not update project/sample "{}/{}" ' 'in Charon with duplication rate : {}' 'and coverage {}'.format("{}/{}".format( project_id, sampleid, dup_pc, cov))) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def get_valid_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False, include_done_seqruns=False, status_field="alignment_status"): """Find all the valid seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC :param bool include_done_seqruns: Include seqruns that are already marked DONE :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict :raises ValueError: If status_field is not a valid value """ valid_status_values = ( "alignment_status", "genotype_status", ) if status_field not in valid_status_values: raise ValueError('"status_field" argument must be one of {} ' '(value passed was "{}")'.format( ", ".join(valid_status_values), status_field)) charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns( projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] try: aln_status = charon_session.seqrun_get( projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id)[status_field] except KeyError: LOG.error( 'Field "{}" not available for seqrun "{}" in Charon ' 'for project "{}" / sample "{}". Including as ' 'valid.'.format(status_field, seqrun_id, project_id, sample_id)) aln_status = None if aln_status != "DONE" or include_done_seqruns: libpreps[libprep_id].append(seqrun_id) else: LOG.info('Skipping seqrun "{}" due to {}' '"{}"'.format(seqrun_id, status_field, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def main(demux_fcid_dir, restrict_to_projects=None, restrict_to_samples=None): demux_fcid_dir = "/proj/a2014205/INBOX/140528_D00415_0049_BC423WACXX" # G.Grigelioniene_14_01 process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140702_D00415_0052_AC41A2ANXX" # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106, P1171_108 process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140905_D00415_0057_BC45KVANXX" # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106 ---- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0222_AC4HA6ACXX" # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105 process_demultiplexed_flowcell( demux_fcid_dir, None, None) # M.Kaller_14_08 sample P1272_101, P1272_104 time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0223_BC4HAPACXX" # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105 process_demultiplexed_flowcell( demux_fcid_dir, None, None) # M.Kaller_14_08 sample P1272_101, P1272_104 time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140919_SN1018_0203_BHA3THADXX" # M.Kaller_14_05 P1170_103, P1170_105 --- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes ###UPPSALA demux_fcid_dir = "/proj/a2014205/INBOX/140821_D00458_0029_AC45JGANXX" # uppsala run process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140917_D00458_0034_AC4FF3ANXX" # -- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes #and now a loop to update the DB time.sleep(3800) charon_session = CharonSession() ####charon_session.project_delete("ND-0522") while True: update_charon_with_local_jobs_status( ) ## this updated local_db and charon accordingly # grab all projects from Charon projects_dict = charon_session.projects_get_all()['projects'] for project_charon in projects_dict: project_name = project_charon["name"] project_dir = os.path.join( "/proj/a2014205/nobackup/NGI/analysis_ready/DATA", project_name) if os.path.isdir(project_dir): projectObj = recreate_project_from_filesystem( project_dir, None) launch_analysis_for_samples([projectObj]) time.sleep(3800)
def get_delivery_token_in_charon(self): '''fetches delivery_token from Charon ''' charon_session = CharonSession() project_charon = charon_session.project_get(self.projectid) if project_charon.get('delivery_token'): return project_charon.get('delivery_token') else: return 'NO-TOKEN'
def main(demux_fcid_dir, restrict_to_projects=None, restrict_to_samples=None): demux_fcid_dir = "/proj/a2014205/INBOX/140528_D00415_0049_BC423WACXX" # G.Grigelioniene_14_01 process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140702_D00415_0052_AC41A2ANXX" # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106, P1171_108 process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140905_D00415_0057_BC45KVANXX" # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106 ---- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0222_AC4HA6ACXX" # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105 process_demultiplexed_flowcell(demux_fcid_dir, None, None) # M.Kaller_14_08 sample P1272_101, P1272_104 time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0223_BC4HAPACXX" # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105 process_demultiplexed_flowcell(demux_fcid_dir, None, None) # M.Kaller_14_08 sample P1272_101, P1272_104 time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140919_SN1018_0203_BHA3THADXX" # M.Kaller_14_05 P1170_103, P1170_105 --- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes ###UPPSALA demux_fcid_dir = "/proj/a2014205/INBOX/140821_D00458_0029_AC45JGANXX" # uppsala run process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140917_D00458_0034_AC4FF3ANXX" # -- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes #and now a loop to update the DB time.sleep(3800) charon_session = CharonSession() ####charon_session.project_delete("ND-0522") while True: update_charon_with_local_jobs_status() ## this updated local_db and charon accordingly # grab all projects from Charon projects_dict = charon_session.projects_get_all()['projects'] for project_charon in projects_dict: project_name = project_charon["name"] project_dir = os.path.join("/proj/a2014205/nobackup/NGI/analysis_ready/DATA", project_name) if os.path.isdir(project_dir): projectObj = recreate_project_from_filesystem(project_dir, None) launch_analysis_for_samples([projectObj]) time.sleep(3800)
def test_construct_charon_url(self): append_list = ["road", "to", "nowhere"] charon_session = CharonSession() # This is a weird test because it's the same code as I'm testing but it also seems weird to code it worse finished_url = "{}/api/v1/{}".format( charon_session._base_url, '/'.join([str(a) for a in append_list])) # The method expects not a list but individual args self.assertEqual(finished_url, CharonSession().construct_charon_url(*append_list))
def main(): args = cli_args() cs = CharonSession() cs.project_update(args.project_id,best_practice_analysis="hello_engine") # it is actually picking up stdout and stderr as well output = subprocess.check_output(["./nextflow", "run", "hello-ga.nf"]) print "The output is:" print output print "done"
def update_analysis(project_id, status): charon_session = CharonSession() mail_analysis(project_id, engine_name='rna_ngi', level='INFO' if status else 'ERROR') new_sample_status = 'ANALYZED' if status else 'FAILED' new_seqrun_status = 'DONE' if status else 'FAILED' for sample in charon_session.project_get_samples(project_id).get( "samples", {}): if sample.get('analysis_status') == "UNDER_ANALYSIS": LOG.info("Marking analysis of sample {}/{} as {}".format( project_id, sample.get('sampleid'), new_sample_status)) charon_session.sample_update(project_id, sample.get('sampleid'), analysis_status=new_sample_status) for libprep in charon_session.sample_get_libpreps( project_id, sample.get('sampleid')).get('libpreps', {}): if libprep.get('qc') != 'FAILED': for seqrun in charon_session.libprep_get_seqruns( project_id, sample.get('sampleid'), libprep.get('libprepid')).get('seqruns', {}): if seqrun.get('alignment_status') == "RUNNING": LOG.info( "Marking analysis of seqrun {}/{}/{}/{} as {}". format(project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), new_seqrun_status)) charon_session.seqrun_update( project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), alignment_status=new_seqrun_status)
def write_to_charon_alignment_results(base_path, project_name, project_id, sample_id, libprep_id, seqrun_id): """Update the status of a sequencing run after alignment. :param str project_name: The name of the project (e.g. T.Durden_14_01) :param str project_id: The id of the project (e.g. P1171) :param str sample_id: ... :param str libprep_id: ... :param str seqrun_id: ... :raises RuntimeError: If the Charon database could not be updated :raises ValueError: If the output data could not be parsed. """ charon_session = CharonSession() try: seqrun_dict = charon_session.seqrun_get(project_id, sample_id, libprep_id, seqrun_id) except CharonError as e: raise CharonError('Error accessing database for project "{}", sample {}; ' 'could not update Charon while performing best practice: ' '{}'.format(project_name, sample_id, e)) piper_run_id = seqrun_id.split("_")[3] seqrun_dict["lanes"] = 0 if seqrun_dict.get("alignment_status") == "DONE": LOG.warn("Sequencing run \"{}\" marked as DONE but writing new alignment results; " "this will overwrite the previous results.".format(seqrun_id)) # Find all the appropriate files piper_result_dir = os.path.join(base_path, "ANALYSIS", project_name, "02_preliminary_alignment_qc") try: os.path.isdir(piper_result_dir) and os.listdir(piper_result_dir) except OSError as e: raise ValueError("Piper result directory \"{}\" inaccessible when updating stats to Charon: {}.".format(piper_result_dir, e)) piper_qc_dir_base = "{}.{}.{}".format(sample_id, piper_run_id, sample_id) piper_qc_path = "{}*/".format(os.path.join(piper_result_dir, piper_qc_dir_base)) piper_qc_dirs = glob.glob(piper_qc_path) if not piper_qc_dirs: # Something went wrong in the alignment or we can't parse the file format raise ValueError("Piper qc directories under \"{}\" are missing or in an unexpected format when updating stats to Charon.".format(piper_qc_path)) # Examine each lane and update the dict with its alignment metrics for qc_lane in piper_qc_dirs: genome_result = os.path.join(qc_lane, "genome_results.txt") # This means that if any of the lanes are missing results, the sequencing run is marked as a failure. # We should flag this somehow and send an email at some point. if not os.path.isfile(genome_result): raise ValueError("File \"genome_results.txt\" is missing from Piper result directory \"{}\"".format(piper_result_dir)) # Get the alignment results for this lane lane_alignment_metrics = parse_qualimap_results(genome_result) # Update the dict for this lane update_seq_run_for_lane(seqrun_dict, lane_alignment_metrics) try: # Update the seqrun in the Charon database charon_session.seqrun_update(**seqrun_dict) except CharonError as e: error_msg = ('Failed to update run alignment status for run "{}" in project {} ' 'sample {}, library prep {} to Charon database: {}'.format(seqrun_id, project_name, sample_id, libprep_id, e)) raise CharonError(error_msg)
def analyze(analysis_object, config=None, config_file_path=None): charon_session = CharonSession() charon_pj=charon_session.project_get(analysis_object.project.project_id) reference_genome=charon_pj.get('reference') if charon_pj.get("sequencing_facility") == "NGI-S": analysis_object.sequencing_facility="sthlm" elif charon_pj.get("sequencing_facility") == "NGI-U": analysis_object.sequencing_facility="upps" else: LOG.error("charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?") raise RuntimeError fastq_files=[] if reference_genome and reference_genome != 'other': for sample in analysis_object.project: try: charon_reported_status = charon_session.sample_get(analysis_object.project.project_id, sample).get('analysis_status') # Check Charon to ensure this hasn't already been processed do_analyze=handle_sample_status(analysis_object, sample, charon_reported_status) if not do_analyze : continue except CharonError as e: LOG.error(e) for libprep in sample: charon_lp_status=charon_session.libprep_get(analysis_object.project.project_id, sample.name, libprep.name).get('qc') do_analyze=handle_libprep_status(analysis_object, libprep, charon_lp_status) if not do_analyze : continue else: for seqrun in libprep: charon_sr_status=charon_session.seqrun_get(analysis_object.project.project_id, sample.name, libprep.name, seqrun.name).get('alignment_status') do_analyze=handle_seqrun_status(analysis_object, seqrun, charon_sr_status) if not do_analyze : continue else: seqrun.being_analyzed=True sample.being_analyzed = sample.being_analyzed or True # filter out index files from analysis for fastq_file in filter(lambda f: not is_index_file(f), seqrun.fastq_files): fastq_path=os.path.join(analysis_object.project.base_path, "DATA", analysis_object.project.project_id, sample.name, libprep.name, seqrun.name, fastq_file) fastq_files.append(fastq_path) if not fastq_files: LOG.error("No fastq files obtained for the analysis fo project {}, please check the Charon status.".format(analysis_object.project.name)) else : if analysis_object.restart_running_jobs: stop_ongoing_analysis(analysis_object) fastq_dir=preprocess_analysis(analysis_object, fastq_files) sbatch_path=write_batch_job(analysis_object, reference_genome, fastq_dir) job_id=start_analysis(sbatch_path) analysis_path=os.path.join(analysis_object.project.base_path, "ANALYSIS", analysis_object.project.project_id, 'rna_ngi') record_project_job(analysis_object.project, job_id, analysis_path)
def analyze_sample(project, sample, config=None, config_file_path=None): """Analyze data at the sample level. :param NGIProject project: the project to analyze :param NGISample sample: the sample to analyzed :param dict config: The parsed configuration file (optional) :param str config_file_path: The path to the configuration file (optional) """ modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"] load_modules(modules_to_load) charon_session = CharonSession() # Determine if we can begin sample-level processing yet. # Conditions are that the coverage is above 28.9X # If these conditions become more complex we can create a function for this sample_total_autosomal_coverage = charon_session.sample_get(project.project_id, sample.name).get('total_autosomal_coverage') if sample_total_autosomal_coverage > 28.4: LOG.info('Sample "{}" in project "{}" is ready for processing.'.format(sample, project)) for workflow_subtask in get_subtasks_for_level(level="sample"): if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask, project_id=project.project_id, sample_id=sample.name): try: ## Temporarily logging to a file until we get ELK set up log_file_path = create_log_file_path(workflow_subtask=workflow_subtask, project_base_path=project.base_path, project_name=project.name, sample_id=sample.name) rotate_log(log_file_path) # Store the exit code of detached processes exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask, project_base_path=project.base_path, project_name=project.name, sample_id=sample.name) build_setup_xml(project, config, sample) command_line = build_piper_cl(project, workflow_subtask, exit_code_path, config) p_handle = launch_piper_job(command_line, project, log_file_path) try: record_process_sample(project=project, sample=sample, workflow_subtask=workflow_subtask, analysis_module_name="piper_ngi", analysis_dir=project.analysis_dir, pid=p_handle.pid) except RuntimeError as e: LOG.error(e) continue except (NotImplementedError, RuntimeError) as e: error_msg = ('Processing project "{}" / sample "{}" failed: ' '{}'.format(project, sample, e.__repr__())) LOG.error(error_msg) else: LOG.info('Sample "{}" in project "{}" is not yet ready for ' 'processing.'.format(sample, project))
def get_valid_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False, include_done_seqruns=False, status_field="alignment_status"): """Find all the valid seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC :param bool include_done_seqruns: Include seqruns that are already marked DONE :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict :raises ValueError: If status_field is not a valid value """ valid_status_values = ("alignment_status", "genotype_status",) if status_field not in valid_status_values: raise ValueError('"status_field" argument must be one of {} ' '(value passed was "{}")'.format(", ".join(valid_status_values), status_field)) charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] try: aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id)[status_field] except KeyError: LOG.error('Field "{}" not available for seqrun "{}" in Charon ' 'for project "{}" / sample "{}". Including as ' 'valid.'.format(status_field, seqrun_id, project_id, sample_id)) aln_status = None if aln_status != "DONE" or include_done_seqruns: libpreps[libprep_id].append(seqrun_id) else: LOG.info('Skipping seqrun "{}" due to {}' '"{}"'.format(seqrun_id,status_field, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def get_engine_for_bp(project, config=None, config_file_path=None): """returns a analysis engine module for the given project. :param NGIProject project: The project to get the engine from. """ charon_session = CharonSession() best_practice_analysis = charon_session.project_get(project.project_id)["best_practice_analysis"] try: analysis_module = load_engine_module(best_practice_analysis, config) except RuntimeError as e: raise RuntimeError('Project "{}": {}'.format(project, e)) else: return analysis_module
def get_staged_samples_from_charon(self): charon_session = CharonSession() result = charon_session.project_get_samples(self.projectid) samples = result.get('samples') if samples is None: raise AssertionError('CharonSession returned no results for project {}'.format(self.projectid)) staged_samples = [] for sample in samples: sample_id = sample.get('sampleid') delivery_status = sample.get('delivery_status') if delivery_status == 'STAGED': staged_samples.append(sample_id) return staged_samples
def main(inbox=None, num_days=14, genotype_files=None, config=None, config_file_path=None): if genotype_files: gt_files_valid = [os.path.abspath(gt_file) for gt_file in genotype_files] else: if not inbox: try: inboxes = config["environment"]["flowcell_inbox"] except (KeyError, TypeError): raise ValueError("No path to delivery inbox specified by argument " "or in configuration file ({}). Exiting.".format(config_file_path)) for inbox in inboxes: inbox = os.path.abspath(inbox) # Convert to seconds cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60) LOG.info("Searching for genotype files under {} modified after " "{}".format(inbox, time.ctime(cutoff_age))) gt_files_valid = [] for gt_file in filter(GENOTYPE_FILE_RE.match, glob.glob(os.path.join(inbox, "*"))): if os.stat(gt_file).st_mtime > time.time() - cutoff_age: gt_files_valid.append(os.path.abspath(gt_file)) if not gt_files_valid: LOG.info("No genotype files found under {} newer than " "{}".format(inbox, time.ctime(cutoff_age))) else: charon_session = CharonSession() for gt_file_path in gt_files_valid: project_samples_dict = \ find_projects_from_samples(parse_samples_from_vcf(gt_file_path)) for project_id, samples in project_samples_dict.iteritems(): LOG.info("Updating project {}...".format(project_id)) for sample in samples: try: genotype_status = \ charon_session.sample_get(projectid=project_id, sampleid=sample).get("genotype_status") if genotype_status in (None, "NOT_AVAILABLE"): LOG.info('Updating sample {} genotype_status ' 'to "AVAILABLE"...'.format(sample)) charon_session.sample_update(projectid=project_id, sampleid=sample, genotype_status="AVAILABLE") else: LOG.info('Not updating sample {} genotype_status ' '(already "{}")'.format(sample, genotype_status)) except CharonError as e: LOG.error('Could not update genotype status to "AVAILABLE" ' 'for project/sample "{}/{}": {}'.format(project_id, sample, e))
def get_engine_for_bp(project, config=None, config_file_path=None): """returns a analysis engine module for the given project. :param NGIProject project: The project to get the engine from. """ charon_session = CharonSession() best_practice_analysis = charon_session.project_get( project.project_id)["best_practice_analysis"] try: analysis_module = load_engine_module(best_practice_analysis, config) except RuntimeError as e: raise RuntimeError('Project "{}": {}'.format(project, e)) else: return analysis_module
def find_projects_from_samples(sample_list): """Given a list of samples, attempts to determine which projects they belong to using Charon records. :param list sample_list: A list of the samples for which to find projects :returns: a dict of {project_id: set(samples)} :rtype: dict of sets :raises ValueError: If you fail to pass in a list. Nice work! """ STHLM_SAMPLE_RE = re.compile(r'(P\d{4})_') projects_dict = collections.defaultdict(set) samples_by_project_id = {} no_owners_found = set() multiple_owners_found = set() charon_session = CharonSession() if not type(sample_list) is list: raise ValueError("Input should be list.") for sample_name in sample_list: # First see if we can just parse out the project id from the sample name m = STHLM_SAMPLE_RE.match(sample_name) if m: project_id = m.groups()[0] try: # Ensure that we guessed right charon_session.sample_get(project_id, sample_name) except CharonError as e: LOG.debug('Project for sample "{}" appears to be "{}" but is not ' 'present in Charon ({})'.format(sample_name, project_id, e)) no_owners_found.add(sample_name) else: projects_dict[project_id].add(sample_name) else: # Otherwise check all the projects for matching samples (returns list or None) owner_projects_list = charon_session.sample_get_projects(sample_name) if not owner_projects_list: no_owners_found.add(sample_name) elif len(owner_projects_list) > 1: multiple_owners_found.add(sample_name) else: projects_dict[owner_projects_list[0]].add(sample_name) if no_owners_found: LOG.warn("No projects found for the following samples: {}".format(", ".join(no_owners_found))) if multiple_owners_found: LOG.warn('Multiple projects found with the following samples (owner ' 'could not be unamibugously determined): {}'.format(", ".join(multiple_owners_found))) return dict(projects_dict)
def get_samples_from_charon(self, delivery_status='STAGED'): """Takes as input a delivery status and return all samples with that delivery status """ charon_session = CharonSession() result = charon_session.project_get_samples(self.projectid) samples = result.get('samples') if samples is None: raise AssertionError('CharonSession returned no results for project {}'.format(self.projectid)) samples_of_interest = [] for sample in samples: sample_id = sample.get('sampleid') charon_delivery_status = sample.get('delivery_status') if charon_delivery_status == delivery_status or delivery_status is None: samples_of_interest.append(sample_id) return samples_of_interest
def reset_charon_records_by_object(project_obj): charon_session = CharonSession() LOG.info("Resetting Charon record for project {}".format(project_obj)) charon_session.project_reset(projectid=project_obj.project_id) LOG.info("Charon record for project {} reset".format(project_obj)) for sample_obj in project_obj: LOG.info("Resetting Charon record for project/sample {}/{}".format(project_obj, sample_obj)) try: charon_session.sample_reset(projectid=project_obj.project_id, sampleid=sample_obj.name) LOG.info("Charon record for project/sample {}/{} reset".format(project_obj, sample_obj)) except CharonError as e: LOG.error("Unable to reset Charon record for project/sample {}/{}: " "{}".format(project_obj, sample_obj, e)) for libprep_obj in sample_obj: LOG.info("Resetting Charon record for project/sample" "libprep {}/{}/{}".format(project_obj, sample_obj, libprep_obj)) try: charon_session.libprep_reset(projectid=project_obj.project_id, sampleid=sample_obj.name, libprepid=libprep_obj.name) LOG.info("Charon record for project/sample/libprep {}/{}/{} " "reset".format(project_obj, sample_obj, libprep_obj)) except CharonError as e: LOG.error("Unable to reset Charon record for project/sample/libprep " "{}/{}/{}: {}".format(project_obj, sample_obj, libprep_obj, e)) for seqrun_obj in libprep_obj: LOG.info("Resetting Charon record for project/sample/libprep/" "seqrun {}/{}/{}/{}".format(project_obj, sample_obj, libprep_obj, seqrun_obj)) try: charon_session.seqrun_reset(projectid=project_obj.project_id, sampleid=sample_obj.name, libprepid=libprep_obj.name, seqrunid=seqrun_obj.name) LOG.info("Charon record for project/sample/libprep/seqrun " "{}/{}/{}/{} reset".format(project_obj, sample_obj, libprep_obj, seqrun_obj)) except CharonError as e: LOG.error("Unable to reset Charon record for project/sample/" "libprep/seqrun {}/{}/{}/{}: {}".format(project_obj, sample_obj, libprep_obj, seqrun_obj, e))
def record_project_job(project, job_id, analysis_dir, workflow=None, engine='rna_ngi', run_mode='local', config=None, config_file_path=None): with get_session() as db_session: project_db_obj = ProjectAnalysis(project_id=project.project_id, job_id=job_id, project_name=project.name, project_base_path=project.base_path, workflow=workflow, engine=engine, analysis_dir=analysis_dir, run_mode=run_mode) db_session.add(project_db_obj) db_session.commit() sample_status_value = "UNDER_ANALYSIS" for sample in project: if sample.being_analyzed: try: LOG.info('Updating Charon status for project/sample ' '{}/{} : {}'.format(project, sample, sample_status_value)) CharonSession().sample_update( projectid=project.project_id, sampleid=sample.name, analysis_status=sample_status_value) for libprep in sample: if CharonSession().libprep_get( project.project_id, sample.name, libprep.name).get('qc') != "FAILED": for seqrun in libprep: if seqrun.being_analyzed: CharonSession().seqrun_update( project.project_id, sample.name, libprep.name, seqrun.name, alignment_status="RUNNING") except Exception as e: LOG.error( "Could not update Charon for sample {}/{} : {}".format( project.project_id, sample.name, e))
def setUp(self): # Create the test project self.project_id = "P100000" self.project_name = "P.Mayhem_14_01" self.project_data = dict(projectid=self.project_id, name=self.project_name, status=None) self.session = CharonSession() response = self.session.post( self.session.construct_charon_url('project'), data=json.dumps(self.project_data)) assert response.status_code == 201, "Could not create test project in Charon: {}".format( response.reason) project = response.json() assert project[ 'projectid'] == self.project_id, "Test project ID is incorrect"
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir, config=None, config_file_path=None): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.iteritems(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt")) ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) reads=0 for path in genome_results_file_paths: try: reads += parse_qualimap_reads(path) except IOError as e : LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path)) except : LOG.error("Error in handling the genome_results.txt file located at {}".format(path)) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, total_reads=reads, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir, config=None, config_file_path=None): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.items(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt")) ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) reads=0 for path in genome_results_file_paths: try: reads += parse_qualimap_reads(path) except IOError as e : LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path)) except : LOG.error("Error in handling the genome_results.txt file located at {}".format(path)) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, total_reads=reads, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def test_create_charon_entries_from_project(self): # Create the NGIObjects project_obj = NGIProject(name=self.p_name, dirname=self.p_name, project_id=self.p_id, base_path=self.p_bp) sample_obj = project_obj.add_sample(name=self.s_id, dirname=self.s_id) libprep_obj = sample_obj.add_libprep(name=self.l_id, dirname=self.l_id) seqrun_obj = libprep_obj.add_seqrun(name=self.sr_id, dirname=self.sr_id) try: # Create them in the db create_charon_entries_from_project(project_obj) finally: charon_session = CharonSession() charon_session.project_delete(project_obj.project_id)
def recreate_project_from_db(analysis_top_dir, project_name, project_id): project_dir = os.path.join(analysis_top_dir, "DATA", project_name) project_obj = NGIProject(name=project_name, dirname=project_name, project_id=project_id, base_path=analysis_top_dir) charon_session = CharonSession() try: samples_dict = charon_session.project_get_samples( project_id)["samples"] except CharonError as e: raise RuntimeError( "Could not access samples for project {}: {}".format( project_id, e)) for sample in samples_dict: sample_id = sample.get("sampleid") sample_dir = os.path.join(project_dir, sample_id) sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id) sample_obj.status = sample.get("status", "unknown") try: libpreps_dict = charon_session.sample_get_libpreps( project_id, sample_id)["libpreps"] except CharonError as e: raise RuntimeError( "Could not access libpreps for project {} / sample {}: {}". format(project_id, sample_id, e)) for libprep in libpreps_dict: libprep_id = libprep.get("libprepid") libprep_obj = sample_obj.add_libprep(name=libprep_id, dirname=libprep_id) libprep_obj.status = libprep.get("status", "unknown") try: seqruns_dict = charon_session.libprep_get_seqruns( project_id, sample_id, libprep_id)["seqruns"] except CharonError as e: raise RuntimeError( "Could not access seqruns for project {} / sample {} / " "libprep {}: {}".format(project_id, sample_id, libprep_id, e)) for seqrun in seqruns_dict: # e.g. 140528_D00415_0049_BC423WACXX seqrun_id = seqrun.get("seqrunid") seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id) seqrun_obj.status = seqrun.get("status", "unknown") return project_obj
def add_supr_name_delivery_in_charon(self, supr_name_of_delivery): '''Updates delivery_projects in Charon at project level ''' charon_session = CharonSession() try: #fetch the project sample_charon = charon_session.sample_get(self.projectid, self.sampleid) delivery_projects = sample_charon['delivery_projects'] if supr_name_of_delivery not in sample_charon: delivery_projects.append(supr_name_of_delivery) charon_session.sample_update(self.projectid, self.sampleid, delivery_projects=delivery_projects) logger.info('Charon delivery_projects for sample {} updated with value {}'.format(self.sampleid, supr_name_of_delivery)) else: logger.warn('Charon delivery_projects for sample {} not updated with value {} because the value was already present'.format(self.sampleid, supr_name_of_delivery)) except Exception, e: logger.error('Failed to update delivery_projects in charon while delivering {}. Error says: {}'.format(self.sampleid, e)) logger.exception(e)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir, config=None, config_file_path=None): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.iteritems(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) ma_coverage = parse_mean_coverage_from_qualimap( piper_qc_dir, sample_id, seqrun_id) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}"'.format( label, ma_coverage)) try: charon_session.seqrun_update( projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ( 'Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def determine_library_prep_from_fcid(project_id, sample_name, fcid): """Use the information in the database to get the library prep id from the project name, sample name, and flowcell id. :param str project_id: The ID of the project :param str sample_name: The name of the sample :param str fcid: The flowcell ID :returns: The library prep (e.g. "A") :rtype str :raises ValueError: If no match was found. """ charon_session = CharonSession() try: libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps'] if libpreps: for libprep in libpreps: # Get the sequencing runs and see if they match the FCID we have seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep['libprepid'])['seqruns'] if seqruns: for seqrun in seqruns: seqrun_runid = seqrun["seqrunid"] if seqrun_runid == fcid: ## BUG if we have one sample with two libpreps on the same flowcell, ## this just picks the first one it encounters; instead, ## it should raise an Exception. Requires restructuring. return libprep['libprepid'] else: raise CharonError("No seqruns found!", 404) else: raise CharonError("No match", 404) else: raise CharonError("No libpreps found!", 404) except CharonError as e: if e.status_code == 404: raise ValueError('No library prep found for project "{}" / sample "{}" ' '/ fcid "{}"'.format(project_id, sample_name, fcid)) else: raise ValueError('Could not determine library prep for project "{}" ' '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e))
def get_engine_for_bp(project, config=None, config_file_path=None): """returns a analysis engine module for the given project. :param NGIProject project: The project to get the engine from. """ charon_session = CharonSession() try: best_practice_analysis = charon_session.project_get(project.project_id)["best_practice_analysis"] except KeyError: error_msg = ('No best practice analysis specified in Charon for ' 'project "{}". Using "whole_genome_reseq"'.format(project)) raise RuntimeError(error_msg) try: analysis_module = load_engine_module(best_practice_analysis, config) except RuntimeError as e: raise RuntimeError('Project "{}": {}'.format(project, e)) else: return analysis_module
def write_status_to_charon(project_id, return_code): """Update the status of a workflow for a project in the Charon database. :param NGIProject project_id: The name of the project :param int return_code: The return code of the workflow process :raises RuntimeError: If the Charon database could not be updated """ ## Consider keeping on CharonSession open. What's the time savings? charon_session = CharonSession() ## Is "CLOSED" correct here? status = "CLOSED" if return_code is 0 else "FAILED" try: charon_session.project_update(project_id, status=status) except CharonError as e: error_msg = ('Failed to update project status to "{}" for "{}" ' 'in Charon database: {}'.format(status, project_id, e)) raise RuntimeError(error_msg)
def add_dds_name_delivery_in_charon(self, name_of_delivery): """Updates delivery_projects in Charon at project level """ charon_session = CharonSession() try: # Fetch the project sample_charon = charon_session.sample_get(self.projectid, self.sampleid) delivery_projects = sample_charon['delivery_projects'] if name_of_delivery not in sample_charon: delivery_projects.append(name_of_delivery) charon_session.sample_update(self.projectid, self.sampleid, delivery_projects=delivery_projects) logger.info('Charon delivery_projects for sample {} updated ' 'with value {}'.format(self.sampleid, name_of_delivery)) else: logger.warn('Charon delivery_projects for sample {} not updated ' 'with value {} because the value was already present'.format(self.sampleid, name_of_delivery)) except Exception as e: logger.exception('Failed to update delivery_projects in charon while delivering {}.'.format(self.sampleid))
def get_engine_for_bp(project, config=None, config_file_path=None): """returns a analysis engine module for the given project. :param NGIProject project: The project to get the engine from. """ charon_session = CharonSession() try: best_practice_analysis = charon_session.project_get(project.project_id)["best_practice_analysis"] except KeyError: error_msg = ('No best practice analysis specified in Charon for ' 'project "{}". Using "whole_genome_reseq"'.format(project)) raise RuntimeError(error_msg) try: analysis_module = load_engine_module(best_practice_analysis, config) except RuntimeError as e: raise RuntimeError('Project "{}": {}'.format(project, e)) else: return analysis_module
def write_status_to_charon(project_id, return_code): """Update the status of a workflow for a project in the Charon database. :param NGIProject project_id: The name of the project :param int return_code: The return code of the workflow process :raises RuntimeError: If the Charon database could not be updated """ ## Consider keeping on CharonSession open. What's the time savings? charon_session = CharonSession() ## Is "CLOSED" correct here? status = "CLOSED" if return_code is 0 else "FAILED" try: charon_session.project_update(project_id, status=status) except CharonError as e: error_msg = ('Failed to update project status to "{}" for "{}" ' 'in Charon database: {}'.format(status, project_id, e)) raise RuntimeError(error_msg)
def determine_library_prep_from_fcid(project_id, sample_name, fcid): """Use the information in the database to get the library prep id from the project name, sample name, and flowcell id. :param str project_id: The ID of the project :param str sample_name: The name of the sample :param str fcid: The flowcell ID :returns: The library prep (e.g. "A") :rtype str :raises ValueError: If no match was found. """ charon_session = CharonSession() try: libpreps = charon_session.sample_get_libpreps(project_id, sample_name)["libpreps"] if libpreps: for libprep in libpreps: # Get the sequencing runs and see if they match the FCID we have seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep["libprepid"])["seqruns"] if seqruns: for seqrun in seqruns: seqrun_runid = seqrun["seqrunid"] if seqrun_runid == fcid: ## BUG if we have one sample with two libpreps on the same flowcell, ## this just picks the first one it encounters; instead, ## it should raise an Exception. Requires restructuring. return libprep["libprepid"] else: raise CharonError("No seqruns found!", 404) else: raise CharonError("No match", 404) else: raise CharonError("No libpreps found!", 404) except CharonError as e: if e.status_code == 404: raise ValueError( 'No library prep found for project "{}" / sample "{}" ' '/ fcid "{}"'.format(project_id, sample_name, fcid) ) else: raise ValueError( 'Could not determine library prep for project "{}" ' '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e) )
def test_workflows(self): config_file_path = locate_ngi_config() config = load_yaml_config(config_file_path) for workflow_name, workflow_dict in config.get("test_data", {}).get("workflows", {}).iteritems(): # Load and rewrite config file as needed customize_config_dict = workflow_dict.get("customize_config") if customize_config_dict: config = update_dict(config, customize_config_dict) #self._install_test_files(workflow_dict) LOG.info('Starting test analysis pipeline for workflow "{}"'.format(workflow_name)) try: local_files = workflow_dict["local_files"] except KeyError: raise ValueError("Required paths to input files for testing do not" "exist in config file (test_data.workflows." "{}.local_files); cannot proceed.".format(workflow_name)) try: flowcell_path = local_files["flowcell"] except KeyError: raise ValueError("Path to flowcell is required and not specified " "in configuration file (test_data.workflows." "{}.local_files.flowcell); cannot proceed.".format(workflow_name)) try: test_project = workflow_dict["test_project"] test_proj_id = test_project["project_id"] test_proj_name = test_project["project_name"] test_proj_bpa = test_project["bpa"] except KeyError as e: raise ValueError("Test project information is missing from config " "file (under test_data.workflows.{}.test_project " "({}); cannot proceed.".format(workflow_name, e.msg)) charon_session = CharonSession(config=config) try: charon_session.project_delete(projectid=test_proj_id) except CharonError: pass charon_session.project_create(projectid=test_proj_id, name=test_proj_name, status="OPEN", best_practice_analysis=test_proj_bpa) process_demultiplexed_flowcells([flowcell_path], fallback_libprep="A", config=config)
def test_create_charon_entries_from_project(self): # Create the NGIObjects project_obj = NGIProject(name=self.p_name, dirname=self.p_name, project_id=self.p_id, base_path=self.p_bp) sample_obj = project_obj.add_sample(name=self.s_id, dirname=self.s_id) libprep_obj = sample_obj.add_libprep(name=self.l_id, dirname=self.l_id) seqrun_obj = libprep_obj.add_seqrun(name=self.sr_id, dirname=self.sr_id) try: # Create them in the db create_charon_entries_from_project(project_obj) finally: charon_session = CharonSession() charon_session.project_delete(project_obj.project_id)
def determine_library_prep_from_fcid(project_id, sample_name, fcid): """Use the information in the database to get the library prep id from the project name, sample name, and flowcell id. :param str project_id: The ID of the project :param str sample_name: The name of the sample :param str fcid: The flowcell ID :returns: The library prep (e.g. "A") :rtype str :raises ValueError: If no match was found. """ charon_session = CharonSession() try: libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps'] if libpreps: for libprep in libpreps: # Get the sequencing runs and see if they match the FCID we have seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep['libprepid'])['seqruns'] if seqruns: for seqrun in seqruns: seqrun_runid = seqrun["seqrunid"] if seqrun_runid == fcid: return libprep['libprepid'] else: raise CharonError("No match", 404) else: raise CharonError("No seqruns found!", 404) else: raise CharonError("No libpreps found!", 404) except CharonError as e: if e.status_code == 404: raise ValueError('No library prep found for project "{}" / sample "{}" ' '/ fcid "{}"'.format(project_id, sample_name, fcid)) else: raise ValueError('Could not determine library prep for project "{}" ' '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e))
def __init__(self, config, log, charon_session=None): """ Create a CharonConnector object which provides an interface to the Charon sample tracking database. :param config: dict with configuration options :param log: a log handle where the connector will log its output :param charon_session: an active database session to use, if not specified, a new session will be created """ self.config = config self.log = log self.charon_session = charon_session or CharonSession(config=self.config)
def setUpClass(cls): cls.session = CharonSession() # Project cls.p_id = "P100000" cls.p_name = "Y.Mom_14_01" cls.p_bp = tempfile.mkdtemp() # Sample cls.s_id = "{}_101".format(cls.p_id) # Libprep cls.l_id = "A" # Seqrun cls.sr_id = generate_run_id()
def check_for_preexisting_sample_runs(project_obj, sample_obj, restart_running_jobs, restart_finished_jobs, status_field="alignment_status"): """If any analysis is undergoing or has completed for this sample's seqruns, raise a RuntimeError. :param NGIProject project_obj: The project object :param NGISample sample_obj: The sample object :param boolean restart_running_jobs: command line parameter :param boolean restart_finished_jobs: command line parameter :param str status_field: The field to check in Charon (seqrun level) :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue """ project_id = project_obj.project_id sample_id = sample_obj.name charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) for libprep in sample_libpreps['libpreps']: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns( projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] aln_status = charon_session.seqrun_get( projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id).get(status_field) if (aln_status == "RUNNING" or aln_status == "UNDER_ANALYSIS" and \ not restart_running_jobs) or \ (aln_status == "DONE" and not restart_finished_jobs): raise RuntimeError('Project/Sample "{}/{}" has a preexisting ' 'seqrun "{}" with status "{}"'.format( project_obj, sample_obj, seqrun_id, aln_status))
def setUpClass(cls): cls.session = CharonSession() # Project cls.p_id = "P100000" cls.p_name = "Y.Mom_14_01" # Sample cls.s_id = "{}_101".format(cls.p_id) # Libprep cls.l_id = "A" # Seqrun cls.sr_id = generate_run_id() cls.sr_total_reads = 1000000 cls.sr_mac = 30
def record_process_sample(project, sample, workflow_subtask, analysis_module_name, process_id=None, slurm_job_id=None, config=None): LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) with get_db_session() as session: sample_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, process_id=process_id, slurm_job_id=slurm_job_id) try: session.add(sample_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) break except OperationalError as e: LOG.warn('Database locked ("{}"). Waiting...'.format(e)) time.sleep(15) else: raise RuntimeError("Could not write to database after three attempts (locked?)") except (IntegrityError, RuntimeError): raise RuntimeError('Could not record slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e)) try: set_status = "UNDER_ANALYSIS" LOG.info(('Updating Charon status for project/sample ' '{}/{} to {}').format(project, sample, set_status)) CharonSession().sample_update(projectid=project.project_id, sampleid=sample.name, analysis_status=set_status) project_obj = create_project_obj_from_analysis_log(project.name, project.project_id, project.base_path, sample.name, workflow_subtask) recurse_status_for_sample(project_obj, "RUNNING") except CharonError as e: error_text = ('Could not update Charon status for project/sample ' '{}/{} due to error: {}'.format(project, sample, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name='piper_ngi', level="ERROR", info_text=error_text)
class DbConnections(): def __init__(self): with open(os.getenv('STATUS_DB_CONFIG'), 'r') as db_cred_file: db_conf = yaml.load(db_cred_file)['statusdb'] self.statusdbSess = sdb(db_conf, db="projects") self.CharonSess = CharonSession() def add_delivery_proj_in_charon(self, delivery_proj, projectid): '''Updates delivery_projects in Charon at project level ''' try: #fetch the project project_charon = self.CharonSess.project_get(projectid) delivery_projects = project_charon['delivery_projects'] if delivery_proj not in delivery_projects: delivery_projects.append(delivery_proj) self.CharonSess.project_update(projectid, delivery_projects=delivery_projects) logger.info('Charon delivery_projects for project {} updated with value {}'.format(projectid, delivery_proj)) else: logger.warn('Charon delivery_projects for project {} not updated with value {} because the value was already present'.format(projectid, delivery_proj)) except Exception, e: logger.error('Failed to update delivery_projects in charon for {}. Error says: {}'.format(projectid, e)) logger.exception(e)
def get_valid_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False, include_done_seqruns=False): """Find all the valid seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC :param bool include_done_seqruns: Include seqruns that are already marked DONE :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict """ charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id).get('alignment_status') if aln_status != "DONE" or include_done_seqruns: libpreps[libprep_id].append(seqrun_id) else: LOG.info('Skipping seqrun "{}" due to alignment_status ' '"{}"'.format(seqrun_id, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def recurse_status_for_sample(project_obj, status_field, status_value, update_done=False, extra_args=None, config=None, config_file_path=None): """Set seqruns under sample to have status for field <status_field> to <status_value> """ if not extra_args: extra_args = {} extra_args.update({status_field: status_value}) charon_session = CharonSession() project_id = project_obj.project_id for sample_obj in project_obj: # There's only one sample but this is an iterator so we iterate sample_id = sample_obj.name for libprep_obj in sample_obj: libprep_id = libprep_obj.name for seqrun_obj in libprep_obj: seqrun_id = seqrun_obj.name label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) LOG.info('Updating status for field "{}" of project/sample/libprep/seqrun ' '"{}" to "{}" in Charon '.format(status_field, label, status_value)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, **extra_args) except CharonError as e: error_text = ('Could not update {} for project/sample/libprep/seqrun ' '"{}" in Charon to "{}": {}'.format(status_field, label, status_value, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_obj.name, level="ERROR", info_text=error_text, workflow=status_field)