def test_construct_charon_url(self): append_list = ["road", "to", "nowhere"] charon_session = CharonSession() # This is a weird test because it's the same code as I'm testing but it also seems weird to code it worse finished_url = "{}/api/v1/{}".format( charon_session._base_url, '/'.join([str(a) for a in append_list])) # The method expects not a list but individual args self.assertEqual(finished_url, CharonSession().construct_charon_url(*append_list))
def record_project_job(project, job_id, analysis_dir, workflow=None, engine='rna_ngi', run_mode='local', config=None, config_file_path=None): with get_session() as db_session: project_db_obj = ProjectAnalysis(project_id=project.project_id, job_id=job_id, project_name=project.name, project_base_path=project.base_path, workflow=workflow, engine=engine, analysis_dir=analysis_dir, run_mode=run_mode) db_session.add(project_db_obj) db_session.commit() sample_status_value = "UNDER_ANALYSIS" for sample in project: if sample.being_analyzed: try: LOG.info('Updating Charon status for project/sample ' '{}/{} : {}'.format(project, sample, sample_status_value)) CharonSession().sample_update( projectid=project.project_id, sampleid=sample.name, analysis_status=sample_status_value) for libprep in sample: if CharonSession().libprep_get( project.project_id, sample.name, libprep.name).get('qc') != "FAILED": for seqrun in libprep: if seqrun.being_analyzed: CharonSession().seqrun_update( project.project_id, sample.name, libprep.name, seqrun.name, alignment_status="RUNNING") except Exception as e: LOG.error( "Could not update Charon for sample {}/{} : {}".format( project.project_id, sample.name, e))
def fetch_charon(context, project, threshold, all_samples): """ Will fetch samples of the specified project from Charon and print the concordance """ try: # get result from charon charon_session = CharonSession() result = charon_session.project_get_samples(project) samples = {} for sample in result.get('samples'): sample_id = sample.get('sampleid') concordance = float(sample.get('genotype_concordance')) status = sample.get('genotype_status') # exclude samples which were not yet checked if status is not None: samples[sample_id] = (concordance, status) # print output if not all_samples and samples: print 'Samples below threshold: {}%'.format(threshold) for sample in sorted(samples.keys()): concordance, status = samples[sample] # if --all, we don't care about threshold if all_samples or concordance <= threshold: # do not print 0% if concordance != 0: print '{} {}% {}'.format(sample, concordance, status) except Exception, e: log.error("Can't fetch Charon. Error says: {}".format(str(e)))
def add_supr_name_delivery_in_charon(self, supr_name_of_delivery): '''Updates delivery_projects in Charon at project level ''' charon_session = CharonSession() try: #fetch the project sample_charon = charon_session.sample_get(self.projectid, self.sampleid) delivery_projects = sample_charon['delivery_projects'] if supr_name_of_delivery not in sample_charon: delivery_projects.append(supr_name_of_delivery) charon_session.sample_update( self.projectid, self.sampleid, delivery_projects=delivery_projects) logger.info( 'Charon delivery_projects for sample {} updated with value {}' .format(self.sampleid, supr_name_of_delivery)) else: logger.warn( 'Charon delivery_projects for sample {} not updated with value {} because the value was already present' .format(self.sampleid, supr_name_of_delivery)) except Exception, e: logger.error( 'Failed to update delivery_projects in charon while delivering {}. Error says: {}' .format(self.sampleid, e)) logger.exception(e)
def save_delivery_token_in_charon(self, delivery_token): '''Updates delivery_token in Charon ''' ## TODO: need to update ngi_pipeline.database.classes.project_update ## and add field in Charon charon_session = CharonSession() charon_session.project_update(self.projectid, delivery_token=delivery_token)
def recurse_status_for_sample(project_obj, set_status, update_done=False): """Set seqruns under sample to have status "set_status" """ charon_session = CharonSession() project_id = project_obj.project_id for sample_obj in project_obj: # There's only one sample but this is an iterator sample_id = sample_obj.name for libprep_obj in sample_obj: libprep_id = libprep_obj.name for seqrun_obj in libprep_obj: seqrun_id = seqrun_obj.name label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) LOG.info(('Updating status of project/sample/libprep/seqrun ' '"{}" to "{}" in Charon ').format(label, set_status)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status=set_status) except CharonError as e: error_text =('Could not update status of project/sample/libprep/seqrun ' '"{}" in Charon to "{}": {}'.format(label, set_status, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_obj.name, level="ERROR", info_text=error_text)
def recreate_project_from_db(analysis_top_dir, project_name, project_id): project_dir = os.path.join(analysis_top_dir, "DATA", project_name) project_obj = NGIProject(name=project_name, dirname=project_name, project_id=project_id, base_path=analysis_top_dir) charon_session = CharonSession() try: samples_dict = charon_session.project_get_samples(project_id)["samples"] except CharonError as e: raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e)) for sample in samples_dict: sample_id = sample.get("sampleid") sample_dir = os.path.join(project_dir, sample_id) sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id) sample_obj.status = sample.get("status", "unknown") try: libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"] except CharonError as e: raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e)) for libprep in libpreps_dict: libprep_id = libprep.get("libprepid") libprep_obj = sample_obj.add_libprep(name=libprep_id, dirname=libprep_id) libprep_obj.status = libprep.get("status", "unknown") try: seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"] except CharonError as e: raise RuntimeError("Could not access seqruns for project {} / sample {} / " "libprep {}: {}".format(project_id, sample_id, libprep_id, e)) for seqrun in seqruns_dict: # e.g. 140528_D00415_0049_BC423WACXX seqrun_id = seqrun.get("seqrunid") seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id) seqrun_obj.status = seqrun.get("status", "unknown") return project_obj
def write_to_charon_NGI_results(job_id, return_code, run_dir): """Update the status of a sequencing run after alignment. :param NGIProject project_id: The name of the project, sample, lib prep, flowcell id :param int return_code: The return code of the workflow process :param string run_dir: the directory where results are stored (I know that I am running piper) :raises RuntimeError: If the Charon database could not be updated """ charon_session = CharonSession() # Consider moving this mapping to the CharonSession object or something if return_code is None: status = "RUNNING" elif return_code == 0: status = "DONE" else: ## TODO we need to differentiate between COMPUTATION_FAILED and DATA_FAILED ## also there is IGNORE? status = "COMPUTATION_FAILED" try: m_dict = STHLM_UUSNP_SAMPLE_RE.match(job_id).groupdict() #m_dict = re.match(r'?P<project_name>\w\.\w+_\d+_\d+|\w{2}-\d+)_(?P<sample_id>[\w-]+)_(?P<libprep_id>\w|\w{2}\d{3}_\2)_(?P<seqrun_id>\d{6}_\w+_\d{4}_.{10})', job_id).groupdict() project_id = get_project_id_from_name(m_dict['project_name']) sample_id = m_dict['sample_id'] except (TypeError, AttributeError): error_msg = "Could not parse project/sample ids from job id \"{}\"; cannot update Charon with results!".format( job_id) raise RuntimeError(error_msg) try: charon_session.sample_update(project_id, sample_id, status=status) except CharonError as e: error_msg = ('Failed to update sample status to "{}" for sample "{}" ' 'in Charon database: {}'.format(status, project_id, sample_id, e)) raise RuntimeError(error_msg)
def update_gtc_for_sample(project_id, sample_id, piper_gtc_path, config=None, config_file_path=None): """Find the genotype concordance file for this sample, if it exists, and update the sample record in Charon with the value parsed from it. :param str project_id: The id of the project :param str sample_id: The id the sample :param str piper_gtc_path: The path to the piper genotype concordance directory :raises CharonError: If there is some Error -- with Charon :raises IOError: If the path specified is missing or inaccessible :raises ValueError: If the specified sample has no data in the gtc file """ gtc_file = os.path.join(piper_gtc_path, "{}.gt_concordance".format(sample_id)) try: concordance_value = parse_genotype_concordance(gtc_file)[sample_id] except KeyError: raise ValueError('Concordance data for sample "{}" not found in gt ' 'concordance file "{}"'.format(sample_id, gtc_file)) gtc_lower_bound = config.get("genotyping", {}).get("lower_bound_cutoff") status_dict = {} if gtc_lower_bound: if concordance_value < concordance_value: status_dict = {"genotype_status": "FAILED"} else: status_dict = {"genotype_status": "PASSED"} charon_session = CharonSession() charon_session.sample_update(projectid=project_id, sampleid=sample_id, genotype_concordance=concordance_value, **status_dict)
def get_engine_for_bp(project, config=None, config_file_path=None): """returns a analysis engine module for the given project. :param NGIProject project: The project to get the engine from. """ charon_session = CharonSession() try: best_practice_analysis = charon_session.project_get( project.project_id)["best_practice_analysis"] if not best_practice_analysis: raise KeyError( "For once in my life ever can't you just fill in the forms properly" ) except KeyError: error_msg = ( 'No best practice analysis specified in Charon for ' 'project "{}". Using "whole_genome_reseq"'.format(project)) LOG.error(error_msg) best_practice_analysis = "whole_genome_reseq" try: analysis_module = load_engine_module(best_practice_analysis, config) except RuntimeError as e: raise RuntimeError('Project "{}": {}'.format(project, e)) else: return analysis_module
def check_for_preexisting_sample_runs(project_obj, sample_obj, restart_running_jobs, restart_finished_jobs): """If any analysis is undergoing or has completed for this sample's seqruns, raise a RuntimeError. :param NGIProject project_obj: The project object :param NGISample sample_obj: The sample object :param boolean restart_running_jobs: command line parameter :param boolean restart_finished_jobs: command line parameter :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue """ project_id = project_obj.project_id sample_id = sample_obj.name charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) for libprep in sample_libpreps['libpreps']: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id).get('alignment_status') if (aln_status == "RUNNING" and not restart_running_jobs) or \ (aln_status == "DONE" and not restart_finished_jobs): raise RuntimeError('Project/Sample "{}/{}" has a preexisting ' 'seqrun "{}" with status "{}"'.format(project_obj, sample_obj, seqrun_id, aln_status))
def update_analysis(project_id, status): charon_session = CharonSession() mail_analysis(project_id, engine_name='rna_ngi', level='INFO' if status else 'ERROR') new_sample_status = 'ANALYZED' if status else 'FAILED' new_seqrun_status = 'DONE' if status else 'FAILED' for sample in charon_session.project_get_samples(project_id).get( "samples", {}): if sample.get('analysis_status') == "UNDER_ANALYSIS": LOG.info("Marking analysis of sample {}/{} as {}".format( project_id, sample.get('sampleid'), new_sample_status)) charon_session.sample_update(project_id, sample.get('sampleid'), analysis_status=new_sample_status) for libprep in charon_session.sample_get_libpreps( project_id, sample.get('sampleid')).get('libpreps', {}): if libprep.get('qc') != 'FAILED': for seqrun in charon_session.libprep_get_seqruns( project_id, sample.get('sampleid'), libprep.get('libprepid')).get('seqruns', {}): if seqrun.get('alignment_status') == "RUNNING": LOG.info( "Marking analysis of seqrun {}/{}/{}/{} as {}". format(project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), new_seqrun_status)) charon_session.seqrun_update( project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), alignment_status=new_seqrun_status)
def get_finished_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False): """Find all the finished seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict """ charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id).get('alignment_status') if aln_status == "DONE": libpreps[libprep_id].append(seqrun_id) else: LOG.debug('Skipping seqrun "{}" due to alignment_status ' '"{}"'.format(seqrun_id, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises RuntimeError: If you specify both the seqrun_id and fcid and they don't match :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.iteritems(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) ma_coverage = _parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}"'.format(label, ma_coverage)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def save_delivery_token_in_charon(self, delivery_token): '''Updates delivery_token in Charon at sample level ''' charon_session = CharonSession() charon_session.sample_update(self.projectid, self.sampleid, delivery_token=delivery_token)
def get_project_id_from_name(project_name): """Given the project name ("Y.Mom_14_01") return the project ID ("P123") :param str project_name: The human-friendly name of the project (e.g. "J.Doe_14_01") :returns: The alphanumeric database-friendly name of the project (e.g. "P123") :rtype: str :raises RuntimeError: If there is some problem relating to the GET (HTTP Return code != 200) :raises ValueError: If the project has no project id in the database or if the project does not exist in Charon """ charon_session = CharonSession() try: project_id = charon_session.project_get(project_name) except CharonError as e: if e.status_code == 404: new_e = ValueError('Project "{}" missing from database: {}'.format( project_name, e)) new_e.status_code = 404 raise e else: raise try: return project_id['projectid'] except KeyError: raise ValueError( 'Couldn\'t retrieve project id for project "{}"; ' 'this project\'s database entry has no "projectid" value.'.format( project))
def update_sample_duplication_and_coverage(project_id, sample_id, project_base_path, config=None, config_file_path=None): """Update Charon with the duplication rates for said sample. :param str project_base_path: The path to the project dir :param str sample_id: The sample name (e.g. P1170_105) """ dup_file_path = os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '05_processed_alignments', "{}.metrics".format(sample_id)) genome_results_file_path = os.path.join( project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id), "genome_results.txt") try: dup_pc = parse_deduplication_percentage(dup_file_path) except: dup_pc = 0 LOG.error( "Cannot find {}.metrics file for duplication rate at {}. Continuing." .format(sample_id, dup_file_path)) try: cov = parse_qualimap_coverage(genome_results_file_path) reads = parse_qualimap_reads(genome_results_file_path) except IOError as e: cov = 0 reads = 0 LOG.error( "Cannot find genome_results.txt file for sample coverage at {}. Continuing." .format(genome_results_file_path)) try: charon_session = CharonSession() charon_session.sample_update(projectid=project_id, sampleid=sample_id, duplication_pc=dup_pc, total_sequenced_reads=reads, total_autosomal_coverage=cov) LOG.info( 'Updating sample "{}" in ' 'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"' .format(sample_id, dup_pc, cov)) except CharonError as e: error_text = ('Could not update project/sample "{}/{}" ' 'in Charon with duplication rate : {}' 'and coverage {}'.format("{}/{}".format( project_id, sampleid, dup_pc, cov))) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def main(demux_fcid_dir, restrict_to_projects=None, restrict_to_samples=None): demux_fcid_dir = "/proj/a2014205/INBOX/140528_D00415_0049_BC423WACXX" # G.Grigelioniene_14_01 process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140702_D00415_0052_AC41A2ANXX" # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106, P1171_108 process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140905_D00415_0057_BC45KVANXX" # M.Kaller_14_06 sample P1171_102, P1171_104, P1171_106 ---- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0222_AC4HA6ACXX" # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105 process_demultiplexed_flowcell( demux_fcid_dir, None, None) # M.Kaller_14_08 sample P1272_101, P1272_104 time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140815_SN1025_0223_BC4HAPACXX" # M.Kaller_14_05 sample P1170_101, P1170_103, P1170_105 process_demultiplexed_flowcell( demux_fcid_dir, None, None) # M.Kaller_14_08 sample P1272_101, P1272_104 time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140919_SN1018_0203_BHA3THADXX" # M.Kaller_14_05 P1170_103, P1170_105 --- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes ###UPPSALA demux_fcid_dir = "/proj/a2014205/INBOX/140821_D00458_0029_AC45JGANXX" # uppsala run process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes demux_fcid_dir = "/proj/a2014205/INBOX/140917_D00458_0034_AC4FF3ANXX" # -- rerun process_demultiplexed_flowcell(demux_fcid_dir, None, None) time.sleep(60) #wait for 1 minutes #and now a loop to update the DB time.sleep(3800) charon_session = CharonSession() ####charon_session.project_delete("ND-0522") while True: update_charon_with_local_jobs_status( ) ## this updated local_db and charon accordingly # grab all projects from Charon projects_dict = charon_session.projects_get_all()['projects'] for project_charon in projects_dict: project_name = project_charon["name"] project_dir = os.path.join( "/proj/a2014205/nobackup/NGI/analysis_ready/DATA", project_name) if os.path.isdir(project_dir): projectObj = recreate_project_from_filesystem( project_dir, None) launch_analysis_for_samples([projectObj]) time.sleep(3800)
def get_valid_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False, include_done_seqruns=False, status_field="alignment_status"): """Find all the valid seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC :param bool include_done_seqruns: Include seqruns that are already marked DONE :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict :raises ValueError: If status_field is not a valid value """ valid_status_values = ( "alignment_status", "genotype_status", ) if status_field not in valid_status_values: raise ValueError('"status_field" argument must be one of {} ' '(value passed was "{}")'.format( ", ".join(valid_status_values), status_field)) charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns( projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] try: aln_status = charon_session.seqrun_get( projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id)[status_field] except KeyError: LOG.error( 'Field "{}" not available for seqrun "{}" in Charon ' 'for project "{}" / sample "{}". Including as ' 'valid.'.format(status_field, seqrun_id, project_id, sample_id)) aln_status = None if aln_status != "DONE" or include_done_seqruns: libpreps[libprep_id].append(seqrun_id) else: LOG.info('Skipping seqrun "{}" due to {}' '"{}"'.format(seqrun_id, status_field, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def get_delivery_token_in_charon(self): '''fetches delivery_token from Charon ''' charon_session = CharonSession() project_charon = charon_session.project_get(self.projectid) if project_charon.get('delivery_token'): return project_charon.get('delivery_token') else: return 'NO-TOKEN'
def __init__(self, config, log, charon_session=None): """ Create a CharonConnector object which provides an interface to the Charon sample tracking database. :param config: dict with configuration options :param log: a log handle where the connector will log its output :param charon_session: an active database session to use, if not specified, a new session will be created """ self.config = config self.log = log self.charon_session = charon_session or CharonSession(config=self.config)
def reset_charon_records_by_name(project_id, restrict_to_samples=None, restrict_to_libpreps=None, restrict_to_seqruns=None): if not restrict_to_samples: restrict_to_samples = [] if not restrict_to_libpreps: restrict_to_libpreps = [] if not restrict_to_seqruns: restrict_to_seqruns = [] charon_session = CharonSession() LOG.info("Resetting Charon record for project {}".format(project_id)) charon_session.project_reset(projectid=project_id) LOG.info("Charon record for project {} reset".format(project_id)) for sample in charon_session.project_get_samples(projectid=project_id).get('samples', []): sample_id = sample['sampleid'] if restrict_to_samples and sample_id not in restrict_to_samples: LOG.info("Skipping project/sample {}/{}: not in list of samples to use " "({})".format(project_id, sample_id, ", ".join(restrict_to_samples))) continue LOG.info("Resetting Charon record for project/sample {}/{}".format(project_id, sample_id)) charon_session.sample_reset(projectid=project_id, sampleid=sample_id) LOG.info("Charon record for project/sample {}/{} reset".format(project_id, sample_id)) for libprep in charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id).get('libpreps', []): libprep_id = libprep['libprepid'] if restrict_to_libpreps and libprep_id not in restrict_to_libpreps: LOG.info("Skipping project/sample/libprep {}/{}/{}: not in list " "of libpreps to use ({})".format(project_id, sample_id, libprep_id, ", ".join(restrict_to_libpreps))) continue LOG.info("Resetting Charon record for project/sample" "libprep {}/{}/{}".format(project_id, sample_id, libprep_id)) charon_session.libprep_reset(projectid=project_id, sampleid=sample_id, libprepid=libprep_id) LOG.info("Charon record for project/sample/libprep {}/{}/{} " "reset".format(project_id, sample_id, libprep_id)) for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id).get('seqruns', []): seqrun_id = seqrun['seqrunid'] if restrict_to_seqruns and seqrun_id not in restrict_to_seqruns: LOG.info("Skipping project/sample/libprep/seqrun {}/{}/{}/{}: " "not in list of seqruns to use ({})".format(project_id, sample_id, libprep_id, seqrun_id, ", ".join(restrict_to_seqruns))) continue LOG.info("Resetting Charon record for project/sample/libprep/" "seqrun {}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)) charon_session.seqrun_reset(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id) LOG.info("Charon record for project/sample/libprep/seqrun " "{}/{}/{}/{} reset".format(project_id, sample_id, libprep_id, seqrun_id))
def test_workflows(self): config_file_path = locate_ngi_config() config = load_yaml_config(config_file_path) for workflow_name, workflow_dict in config.get("test_data", {}).get( "workflows", {}).iteritems(): # Load and rewrite config file as needed customize_config_dict = workflow_dict.get("customize_config") if customize_config_dict: config = update_dict(config, customize_config_dict) #self._install_test_files(workflow_dict) LOG.info( 'Starting test analysis pipeline for workflow "{}"'.format( workflow_name)) try: local_files = workflow_dict["local_files"] except KeyError: raise ValueError( "Required paths to input files for testing do not" "exist in config file (test_data.workflows." "{}.local_files); cannot proceed.".format(workflow_name)) try: flowcell_path = local_files["flowcell"] except KeyError: raise ValueError( "Path to flowcell is required and not specified " "in configuration file (test_data.workflows." "{}.local_files.flowcell); cannot proceed.".format( workflow_name)) try: test_project = workflow_dict["test_project"] test_proj_id = test_project["project_id"] test_proj_name = test_project["project_name"] test_proj_bpa = test_project["bpa"] except KeyError as e: raise ValueError( "Test project information is missing from config " "file (under test_data.workflows.{}.test_project " "({}); cannot proceed.".format(workflow_name, e.msg)) charon_session = CharonSession(config=config) try: charon_session.project_delete(projectid=test_proj_id) except CharonError: pass charon_session.project_create(projectid=test_proj_id, name=test_proj_name, status="OPEN", best_practice_analysis=test_proj_bpa) process_demultiplexed_flowcells([flowcell_path], fallback_libprep="A", config=config)
def setUpClass(cls): cls.session = CharonSession() # Project cls.p_id = "P100000" cls.p_name = "Y.Mom_14_01" cls.p_bp = tempfile.mkdtemp() # Sample cls.s_id = "{}_101".format(cls.p_id) # Libprep cls.l_id = "A" # Seqrun cls.sr_id = generate_run_id()
def setUpClass(cls): cls.session = CharonSession() # Project cls.p_id = "P100000" cls.p_name = "Y.Mom_14_01" # Sample cls.s_id = "{}_101".format(cls.p_id) # Libprep cls.l_id = "A" # Seqrun cls.sr_id = generate_run_id() cls.sr_total_reads = 1000000 cls.sr_mac = 30
def get_staged_samples_from_charon(self): charon_session = CharonSession() result = charon_session.project_get_samples(self.projectid) samples = result.get('samples') if samples is None: raise AssertionError('CharonSession returned no results for project {}'.format(self.projectid)) staged_samples = [] for sample in samples: sample_id = sample.get('sampleid') delivery_status = sample.get('delivery_status') if delivery_status == 'STAGED': staged_samples.append(sample_id) return staged_samples
def record_process_sample(project, sample, workflow_subtask, analysis_module_name, process_id=None, slurm_job_id=None, config=None): LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) with get_db_session() as session: sample_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, process_id=process_id, slurm_job_id=slurm_job_id) try: session.add(sample_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) break except OperationalError as e: LOG.warn('Database locked ("{}"). Waiting...'.format(e)) time.sleep(15) else: raise RuntimeError("Could not write to database after three attempts (locked?)") except (IntegrityError, RuntimeError): raise RuntimeError('Could not record slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e)) try: set_status = "UNDER_ANALYSIS" LOG.info(('Updating Charon status for project/sample ' '{}/{} to {}').format(project, sample, set_status)) CharonSession().sample_update(projectid=project.project_id, sampleid=sample.name, analysis_status=set_status) project_obj = create_project_obj_from_analysis_log(project.name, project.project_id, project.base_path, sample.name, workflow_subtask) recurse_status_for_sample(project_obj, "RUNNING") except CharonError as e: error_text = ('Could not update Charon status for project/sample ' '{}/{} due to error: {}'.format(project, sample, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name='piper_ngi', level="ERROR", info_text=error_text)
def get_engine_for_bp(project, config=None, config_file_path=None): """returns a analysis engine module for the given project. :param NGIProject project: The project to get the engine from. """ charon_session = CharonSession() best_practice_analysis = charon_session.project_get( project.project_id)["best_practice_analysis"] try: analysis_module = load_engine_module(best_practice_analysis, config) except RuntimeError as e: raise RuntimeError('Project "{}": {}'.format(project, e)) else: return analysis_module
def find_projects_from_samples(sample_list): """Given a list of samples, attempts to determine which projects they belong to using Charon records. :param list sample_list: A list of the samples for which to find projects :returns: a dict of {project_id: set(samples)} :rtype: dict of sets :raises ValueError: If you fail to pass in a list. Nice work! """ STHLM_SAMPLE_RE = re.compile(r'(P\d{4})_') projects_dict = collections.defaultdict(set) samples_by_project_id = {} no_owners_found = set() multiple_owners_found = set() charon_session = CharonSession() if not type(sample_list) is list: raise ValueError("Input should be list.") for sample_name in sample_list: # First see if we can just parse out the project id from the sample name m = STHLM_SAMPLE_RE.match(sample_name) if m: project_id = m.groups()[0] try: # Ensure that we guessed right charon_session.sample_get(project_id, sample_name) except CharonError as e: LOG.debug('Project for sample "{}" appears to be "{}" but is not ' 'present in Charon ({})'.format(sample_name, project_id, e)) no_owners_found.add(sample_name) else: projects_dict[project_id].add(sample_name) else: # Otherwise check all the projects for matching samples (returns list or None) owner_projects_list = charon_session.sample_get_projects(sample_name) if not owner_projects_list: no_owners_found.add(sample_name) elif len(owner_projects_list) > 1: multiple_owners_found.add(sample_name) else: projects_dict[owner_projects_list[0]].add(sample_name) if no_owners_found: LOG.warn("No projects found for the following samples: {}".format(", ".join(no_owners_found))) if multiple_owners_found: LOG.warn('Multiple projects found with the following samples (owner ' 'could not be unamibugously determined): {}'.format(", ".join(multiple_owners_found))) return dict(projects_dict)
def get_samples_from_charon(self, delivery_status='STAGED'): """Takes as input a delivery status and return all samples with that delivery status """ charon_session = CharonSession() result = charon_session.project_get_samples(self.projectid) samples = result.get('samples') if samples is None: raise AssertionError('CharonSession returned no results for project {}'.format(self.projectid)) samples_of_interest = [] for sample in samples: sample_id = sample.get('sampleid') charon_delivery_status = sample.get('delivery_status') if charon_delivery_status == delivery_status or delivery_status is None: samples_of_interest.append(sample_id) return samples_of_interest