def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises RuntimeError: If you specify both the seqrun_id and fcid and they don't match :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.iteritems(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) ma_coverage = _parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}"'.format(label, ma_coverage)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_analysis(project_id, status): charon_session = CharonSession() mail_analysis(project_id, engine_name='rna_ngi', level='INFO' if status else 'ERROR') new_sample_status = 'ANALYZED' if status else 'FAILED' new_seqrun_status = 'DONE' if status else 'FAILED' for sample in charon_session.project_get_samples(project_id).get( "samples", {}): if sample.get('analysis_status') == "UNDER_ANALYSIS": LOG.info("Marking analysis of sample {}/{} as {}".format( project_id, sample.get('sampleid'), new_sample_status)) charon_session.sample_update(project_id, sample.get('sampleid'), analysis_status=new_sample_status) for libprep in charon_session.sample_get_libpreps( project_id, sample.get('sampleid')).get('libpreps', {}): if libprep.get('qc') != 'FAILED': for seqrun in charon_session.libprep_get_seqruns( project_id, sample.get('sampleid'), libprep.get('libprepid')).get('seqruns', {}): if seqrun.get('alignment_status') == "RUNNING": LOG.info( "Marking analysis of seqrun {}/{}/{}/{} as {}". format(project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), new_seqrun_status)) charon_session.seqrun_update( project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), alignment_status=new_seqrun_status)
def recurse_status_for_sample(project_obj, set_status, update_done=False): """Set seqruns under sample to have status "set_status" """ charon_session = CharonSession() project_id = project_obj.project_id for sample_obj in project_obj: # There's only one sample but this is an iterator sample_id = sample_obj.name for libprep_obj in sample_obj: libprep_id = libprep_obj.name for seqrun_obj in libprep_obj: seqrun_id = seqrun_obj.name label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) LOG.info(('Updating status of project/sample/libprep/seqrun ' '"{}" to "{}" in Charon ').format(label, set_status)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status=set_status) except CharonError as e: error_text =('Could not update status of project/sample/libprep/seqrun ' '"{}" in Charon to "{}": {}'.format(label, set_status, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_obj.name, level="ERROR", info_text=error_text)
def write_to_charon_alignment_results(base_path, project_name, project_id, sample_id, libprep_id, seqrun_id): """Update the status of a sequencing run after alignment. :param str project_name: The name of the project (e.g. T.Durden_14_01) :param str project_id: The id of the project (e.g. P1171) :param str sample_id: ... :param str libprep_id: ... :param str seqrun_id: ... :raises RuntimeError: If the Charon database could not be updated :raises ValueError: If the output data could not be parsed. """ charon_session = CharonSession() try: seqrun_dict = charon_session.seqrun_get(project_id, sample_id, libprep_id, seqrun_id) except CharonError as e: raise CharonError('Error accessing database for project "{}", sample {}; ' 'could not update Charon while performing best practice: ' '{}'.format(project_name, sample_id, e)) piper_run_id = seqrun_id.split("_")[3] seqrun_dict["lanes"] = 0 if seqrun_dict.get("alignment_status") == "DONE": LOG.warn("Sequencing run \"{}\" marked as DONE but writing new alignment results; " "this will overwrite the previous results.".format(seqrun_id)) # Find all the appropriate files piper_result_dir = os.path.join(base_path, "ANALYSIS", project_name, "02_preliminary_alignment_qc") try: os.path.isdir(piper_result_dir) and os.listdir(piper_result_dir) except OSError as e: raise ValueError("Piper result directory \"{}\" inaccessible when updating stats to Charon: {}.".format(piper_result_dir, e)) piper_qc_dir_base = "{}.{}.{}".format(sample_id, piper_run_id, sample_id) piper_qc_path = "{}*/".format(os.path.join(piper_result_dir, piper_qc_dir_base)) piper_qc_dirs = glob.glob(piper_qc_path) if not piper_qc_dirs: # Something went wrong in the alignment or we can't parse the file format raise ValueError("Piper qc directories under \"{}\" are missing or in an unexpected format when updating stats to Charon.".format(piper_qc_path)) # Examine each lane and update the dict with its alignment metrics for qc_lane in piper_qc_dirs: genome_result = os.path.join(qc_lane, "genome_results.txt") # This means that if any of the lanes are missing results, the sequencing run is marked as a failure. # We should flag this somehow and send an email at some point. if not os.path.isfile(genome_result): raise ValueError("File \"genome_results.txt\" is missing from Piper result directory \"{}\"".format(piper_result_dir)) # Get the alignment results for this lane lane_alignment_metrics = parse_qualimap_results(genome_result) # Update the dict for this lane update_seq_run_for_lane(seqrun_dict, lane_alignment_metrics) try: # Update the seqrun in the Charon database charon_session.seqrun_update(**seqrun_dict) except CharonError as e: error_msg = ('Failed to update run alignment status for run "{}" in project {} ' 'sample {}, library prep {} to Charon database: {}'.format(seqrun_id, project_name, sample_id, libprep_id, e)) raise CharonError(error_msg)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir, config=None, config_file_path=None): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.items(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt")) ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) reads=0 for path in genome_results_file_paths: try: reads += parse_qualimap_reads(path) except IOError as e : LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path)) except : LOG.error("Error in handling the genome_results.txt file located at {}".format(path)) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, total_reads=reads, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir, config=None, config_file_path=None): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.iteritems(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt")) ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) reads=0 for path in genome_results_file_paths: try: reads += parse_qualimap_reads(path) except IOError as e : LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path)) except : LOG.error("Error in handling the genome_results.txt file located at {}".format(path)) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, total_reads=reads, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def main(project): charon_session = CharonSession() samples = charon_session.project_get_samples(project) for sample in samples["samples"]: charon_session.sample_update(project, sample["sampleid"], analysis_status= "TO_ANALYZE", genotype_status=None, total_autosomal_coverage="0", total_sequenced_reads="0") for sample_prep in charon_session.sample_get_libpreps(project, sample["sampleid"])['libpreps']: seqruns = charon_session.libprep_get_seqruns(project, sample["sampleid"], sample_prep["libprepid"])['seqruns'] for seqrun in seqruns: charon_session.seqrun_update(project, sample["sampleid"], sample_prep["libprepid"], seqrun["seqrunid"], mean_autosomal_coverage = "0", alignment_status = "NOT_RUNNING")
def main(project): charon_session = CharonSession() samples = charon_session.project_get_samples(project) for sample in samples["samples"]: charon_session.sample_update(project, sample["sampleid"], analysis_status="TO_ANALYZE", genotype_status=None, total_autosomal_coverage="0", total_sequenced_reads="0") for sample_prep in charon_session.sample_get_libpreps( project, sample["sampleid"])['libpreps']: seqruns = charon_session.libprep_get_seqruns( project, sample["sampleid"], sample_prep["libprepid"])['seqruns'] for seqrun in seqruns: charon_session.seqrun_update(project, sample["sampleid"], sample_prep["libprepid"], seqrun["seqrunid"], mean_autosomal_coverage="0", alignment_status="NOT_RUNNING")
def recurse_status_for_sample(project_obj, status_field, status_value, update_done=False, extra_args=None, config=None, config_file_path=None): """Set seqruns under sample to have status for field <status_field> to <status_value> """ if not extra_args: extra_args = {} extra_args.update({status_field: status_value}) charon_session = CharonSession() project_id = project_obj.project_id for sample_obj in project_obj: # There's only one sample but this is an iterator so we iterate sample_id = sample_obj.name for libprep_obj in sample_obj: libprep_id = libprep_obj.name for seqrun_obj in libprep_obj: seqrun_id = seqrun_obj.name label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) LOG.info('Updating status for field "{}" of project/sample/libprep/seqrun ' '"{}" to "{}" in Charon '.format(status_field, label, status_value)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, **extra_args) except CharonError as e: error_text = ('Could not update {} for project/sample/libprep/seqrun ' '"{}" in Charon to "{}": {}'.format(status_field, label, status_value, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_obj.name, level="ERROR", info_text=error_text, workflow=status_field)
def create_charon_entries_from_project(project, best_practice_analysis="whole_genome_reseq", sequencing_facility="NGI-S", force_overwrite=False, delete_existing=False, retry_on_fail=True): """Given a project object, creates the relevant entries in Charon. This code is remarkably shoddy as I created it in a hurry and then later it became a part of the pipeline. Use at your own risk! Ha ha. :param NGIProject project: The NGIProject object :param str best_practice_analysis: The workflow to assign for this project (default "variant_calling") :param str sequencing_facility: The facility that did the sequencing :param bool force_overwrite: If this is set to true, overwrite existing entries in Charon (default false) :param bool delete_existing: Don't just update existing entries, delete them and create new ones (default false) """ charon_session = CharonSession() update_failed=False try: status = "OPEN" LOG.info('Creating project "{}" with status "{}", best practice analysis "{}", ' 'and sequencing_facility {}'.format(project, status, best_practice_analysis, sequencing_facility)) charon_session.project_create(projectid=project.project_id, name=project.name, status=status, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility) LOG.info('Project "{}" created in Charon.'.format(project)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warn('Overwriting data for project "{}"'.format(project)) charon_session.project_update(projectid=project.project_id, name=project.name, status=status, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility) LOG.info('Project "{}" updated in Charon.'.format(project)) else: LOG.info('Project "{}" already exists; moving to samples...'.format(project)) else: raise for sample in project: if delete_existing: LOG.warn('Deleting existing sample "{}"'.format(sample)) try: charon_session.sample_delete(projectid=project.project_id, sampleid=sample.name) except CharonError as e: update_failed=True LOG.error('Could not delete sample "{}": {}'.format(sample, e)) try: analysis_status = "TO_ANALYZE" LOG.info('Creating sample "{}" with analysis_status "{}"'.format(sample, analysis_status)) charon_session.sample_create(projectid=project.project_id, sampleid=sample.name, analysis_status=analysis_status) LOG.info('Project/sample "{}/{}" created in Charon.'.format(project, sample)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}"'.format(project, sample)) charon_session.sample_update(projectid=project.project_id, sampleid=sample.name, analysis_status=analysis_status) LOG.info('Project/sample "{}/{}" updated in Charon.'.format(project, sample)) else: LOG.info('Project "{}" / sample "{}" already exists; moving ' 'to libpreps'.format(project, sample)) else: update_failed=True LOG.error(e) continue for libprep in sample: if delete_existing: LOG.warn('Deleting existing libprep "{}"'.format(libprep)) try: charon_session.libprep_delete(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name) except CharonError as e: LOG.warn('Could not delete libprep "{}": {}'.format(libprep, e)) try: qc = "PASSED" LOG.info('Creating libprep "{}" with qc status "{}"'.format(libprep, qc)) charon_session.libprep_create(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, qc=qc) LOG.info(('Project/sample/libprep "{}/{}/{}" created in ' 'Charon').format(project, sample, libprep)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}" / libprep "{}"'.format(project, sample, libprep)) charon_session.libprep_update(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, qc=qc) LOG.info(('Project/sample/libprep "{}/{}/{}" updated in ' 'Charon').format(project, sample, libprep)) else: LOG.debug(e) LOG.info('Project "{}" / sample "{}" / libprep "{}" already ' 'exists; moving to libpreps'.format(project, sample, libprep)) else: update_failed=True LOG.error(e) continue for seqrun in libprep: if delete_existing: LOG.warn('Deleting existing seqrun "{}"'.format(seqrun)) try: charon_session.seqrun_delete(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name) except CharonError as e: update_failed=True LOG.error('Could not delete seqrun "{}": {}'.format(seqrun, e)) try: alignment_status="NOT_RUNNING" LOG.info('Creating seqrun "{}" with alignment_status "{}"'.format(seqrun, alignment_status)) charon_session.seqrun_create(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name, alignment_status=alignment_status, total_reads=0, mean_autosomal_coverage=0) LOG.info(('Project/sample/libprep/seqrun "{}/{}/{}/{}" ' 'created in Charon').format(project, sample, libprep, seqrun)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}" / libprep "{}" / ' 'seqrun "{}"'.format(project, sample, libprep, seqrun)) charon_session.seqrun_update(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name, alignment_status=alignment_status, total_reads=0, mean_autosomal_coverage=0) LOG.info(('Project/sample/libprep/seqrun "{}/{}/{}/{}" ' 'updated in Charon').format(project, sample, libprep, seqrun)) else: LOG.info('Project "{}" / sample "{}" / libprep "{}" / ' 'seqrun "{}" already exists; next...'.format(project, sample, libprep, seqrun)) else: update_failed=True LOG.error(e) continue if update_failed : if retry_on_fail: create_charon_entries_from_project(project, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility, force_overwrite=force_overwrite, delete_existing=delete_existing, retry_on_fail=False) else: raise CharonError("A network error blocks Charon updating.")
def update_charon_with_local_jobs_status(): """Check the status of all locally-tracked jobs and update Charon accordingly. """ LOG.info("Updating Charon with the status of all locally-tracked jobs...") with get_db_session() as session: charon_session = CharonSession() # Sequencing Run Analyses for seqrun_entry in session.query(SeqrunAnalysis).all(): # Local names workflow = seqrun_entry.workflow project_name = seqrun_entry.project_name project_id = seqrun_entry.project_id project_base_path = seqrun_entry.project_base_path sample_id = seqrun_entry.sample_id libprep_id = seqrun_entry.libprep_id seqrun_id = seqrun_entry.seqrun_id pid = seqrun_entry.process_id exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, sample_id=sample_id, libprep_id=libprep_id, seqrun_id=seqrun_id) label = "project/sample/libprep/seqrun {}/{}/{}/{}".format(project_name, sample_id, libprep_id, seqrun_id) try: if exit_code == 0: # 0 -> Job finished successfully LOG.info('Workflow "{}" for {} finished succesfully. ' 'Recording status "DONE" in Charon'.format(workflow, label)) set_alignment_status = "DONE" try: write_to_charon_alignment_results(base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id, libprep_id=libprep_id, seqrun_id=seqrun_id) except (RuntimeError, ValueError) as e: LOG.error(e) set_alignment_status = "FAILED" charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status=set_alignment_status) # Job is only deleted if the Charon update succeeds session.delete(seqrun_entry) elif exit_code == 1 or (not psutil.pid_exists(pid) and not exit_code): if exit_code == 1: # 1 -> Job failed (DATA_FAILURE / COMPUTATION_FAILURE ?) LOG.info('Workflow "{}" for {} failed. Recording status ' '"FAILED" in Charon.'.format(workflow, label)) else: # Job failed without writing an exit code (process no longer running) LOG.error('ERROR: No exit code found for process {} ' 'but it does not appear to be running ' '(pid {} does not exist). Setting status to ' '"FAILED", inspect manually'.format(label, pid)) charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status="FAILED") # Job is only deleted if the Charon update succeeds LOG.debug("Deleting local entry {}".format(seqrun_entry)) session.delete(seqrun_entry) else: # None -> Job still running charon_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id)['alignment_status'] if not charon_status == "RUNNING": LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but ' 'local process tracking database indicates it is running. ' 'Setting value in Charon to RUNNING.'.format(label, charon_status)) charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status="RUNNING") except CharonError as e: LOG.error('Unable to update Charon status for "{}": {}'.format(label, e)) for sample_entry in session.query(SampleAnalysis).all(): # Local names workflow = sample_entry.workflow project_name = sample_entry.project_name project_id = sample_entry.project_id project_base_path = sample_entry.project_base_path sample_id = sample_entry.sample_id pid = sample_entry.process_id exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, sample_id=sample_id) label = "project/sample/libprep/seqrun {}/{}".format(project_name, sample_id) try: if exit_code == 0: # 0 -> Job finished successfully LOG.info('Workflow "{}" for {} finished succesfully. ' 'Recording status "DONE" in Charon'.format(workflow, label)) set_status = "DONE" ## TODO implement sample-level analysis results parsing / reporting to Charon? #try: # write_to_charon_alignment_results(base_path=project_base_path, # project_name=project_name, # project_id=project_id, # sample_id=sample_id, # libprep_id=libprep_id, # seqrun_id=seqrun_id) #except (RuntimeError, ValueError) as e: # LOG.error(e) # set_alignment_status = "FAILED" charon_session.sample_update(projectid=project_id, sampleid=sample_id, status=set_status) # Job is only deleted if the Charon update succeeds session.delete(sample_entry) elif exit_code == 1 or (not psutil.pid_exists(pid) and not exit_code): if exit_code == 1: # 1 -> Job failed (DATA_FAILURE / COMPUTATION_FAILURE ?) LOG.info('Workflow "{}" for {} failed. Recording status ' '"COMPUTATION_FAILED" in Charon.'.format(workflow, label)) else: # Job failed without writing an exit code LOG.error('ERROR: No exit code found for process {} ' 'but it does not appear to be running ' '(pid {} does not exist). Setting status to ' '"COMPUTATION_FAILED", inspect manually'.format(label, pid)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, status="COMPUTATION_FAILED") # Job is only deleted if the Charon update succeeds session.delete(sample_entry) else: # None -> Job still running try: charon_status = charon_session.sample_get(projectid=project_id, sampleid=sample_id)['status'] except (CharonError, KeyError) as e: LOG.warn('Unable to get required information from Charon for ' 'sample "{}" / project "{}" -- forcing it to RUNNING: {}'.format(sample_id, project_id, e)) charon_status = "NEW" if not charon_status == "RUNNING": LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but ' 'local process tracking database indicates it is running. ' 'Setting value in Charon to RUNNING.'.format(label, charon_status)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, status="RUNNING") except CharonError as e: LOG.error('Unable to update Charon status for "{}": {}'.format(label, e)) session.commit()
def create_charon_entries_from_project(project, best_practice_analysis="whole_genome_reseq", sequencing_facility="NGI-S", force_overwrite=False, delete_existing=False): """Given a project object, creates the relevant entries in Charon. :param NGIProject project: The NGIProject object :param str best_practice_analysis: The workflow to assign for this project (default "variant_calling") :param str sequencing_facility: The facility that did the sequencing :param bool force_overwrite: If this is set to true, overwrite existing entries in Charon (default false) :param bool delete_existing: Don't just update existing entries, delete them and create new ones (default false) """ charon_session = CharonSession() try: status="OPEN" LOG.info('Creating project "{}" with status "{}", best practice analysis "{}", ' 'and sequencing_facility {}'.format(project, status, best_practice_analysis, sequencing_facility)) charon_session.project_create(projectid=project.project_id, name=project.name, status=status, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility) LOG.info('Project "{}" created in Charon.'.format(project)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warn('Overwriting data for project "{}"'.format(project)) charon_session.project_update(projectid=project.project_id, name=project.name, status=status, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility) LOG.info('Project "{}" updated in Charon.'.format(project)) else: LOG.info('Project "{}" already exists; moving to samples...'.format(project)) else: raise for sample in project: if delete_existing: LOG.warn('Deleting existing sample "{}"'.format(sample)) try: charon_session.sample_delete(projectid=project.project_id, sampleid=sample.name) except CharonError as e: LOG.error('Could not delete sample "{}": {}'.format(sample, e)) try: analysis_status = "TO_ANALYZE" LOG.info('Creating sample "{}" with analysis_status "{}"'.format(sample, analysis_status)) charon_session.sample_create(projectid=project.project_id, sampleid=sample.name, analysis_status=analysis_status) LOG.info('Project/sample "{}/{}" created in Charon.'.format(project, sample)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}"'.format(project, sample)) charon_session.sample_update(projectid=project.project_id, sampleid=sample.name, analysis_status=analysis_status) LOG.info('Project/sample "{}/{}" updated in Charon.'.format(project, sample)) else: LOG.info('Project "{}" / sample "{}" already exists; moving ' 'to libpreps'.format(project, sample)) else: LOG.error(e) continue for libprep in sample: if delete_existing: LOG.warn('Deleting existing libprep "{}"'.format(libprep)) try: charon_session.libprep_delete(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name) except CharonError as e: LOG.warn('Could not delete libprep "{}": {}'.format(libprep, e)) try: qc= "PASSED" LOG.info('Creating libprep "{}" with qc status "{}"'.format(libprep, qc)) charon_session.libprep_create(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, qc=qc) LOG.info(('Project/sample/libprep "{}/{}/{}" created in ' 'Charon').format(project, sample, libprep)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}" / libprep "{}"'.format(project, sample, libprep)) charon_session.libprep_update(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, qc=qc) LOG.info(('Project/sample/libprep "{}/{}/{}" updated in ' 'Charon').format(project, sample, libprep)) else: LOG.info(e) LOG.info('Project "{}" / sample "{}" / libprep "{}" already ' 'exists; moving to libpreps'.format(project, sample, libprep)) else: LOG.error(e) continue for seqrun in libprep: if delete_existing: LOG.warn('Deleting existing seqrun "{}"'.format(seqrun)) try: charon_session.seqrun_delete(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name) except CharonError as e: LOG.error('Could not delete seqrun "{}": {}'.format(seqrun, e)) try: alignment_status="NOT_RUNNING" LOG.info('Creating seqrun "{}" with alignment_status "{}"'.format(seqrun, alignment_status)) charon_session.seqrun_create(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name, alignment_status=alignment_status, total_reads=0, mean_autosomal_coverage=0) LOG.info(('Project/sample/libprep/seqrun "{}/{}/{}/{}" ' 'created in Charon').format(project, sample, libprep, seqrun)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}" / libprep "{}" / ' 'seqrun "{}"'.format(project, sample, libprep, seqrun)) charon_session.seqrun_update(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name, alignment_status=alignment_status, total_reads=0, mean_autosomal_coverage=0) LOG.info(('Project/sample/libprep/seqrun "{}/{}/{}/{}" ' 'updated in Charon').format(project, sample, libprep, seqrun)) else: LOG.info('Project "{}" / sample "{}" / libprep "{}" / ' 'seqrun "{}" already exists; next...'.format(project, sample, libprep, seqrun)) else: LOG.error(e) continue
def create_charon_entries_from_project(project, workflow="NGI", force_overwrite=False): """Given a project object, creates the relevant entries in Charon. :param NGIProject project: The NGIProject object :param str workflow: The workflow to assign for this project (default NGI) :param bool force_overwrite: If this is set to true, overwrite existing entries in Charon (default false) """ charon_session = CharonSession() try: status="SEQUENCED" LOG.info('Creating project "{}" with status "{}" and workflow "{}"'.format(project, status, workflow)) charon_session.project_create(projectid=project.project_id, name=project.name, status=status, pipeline=workflow) except CharonError: if force_overwrite: LOG.warn('Overwriting data for project "{}"'.format(project)) charon_session.project_update(projectid=project.project_id, name=project.name, status=status, pipeline=workflow) else: LOG.info('Project "{}" already exists; moving to samples...'.format(project)) for sample in project: try: LOG.info('Creating sample "{}"'.format(sample)) charon_session.sample_create(projectid=project.project_id, sampleid=sample.name, status="NEW") except CharonError: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}"'.format(project, sample)) charon_session.sample_update(projectid=project.project_id, sampleid=sample.name, status="NEW") else: LOG.info('Project "{}" / sample "{}" already exists; moving ' 'to libpreps'.format(project, sample)) for libprep in sample: try: LOG.info('Creating libprep "{}"'.format(libprep)) charon_session.libprep_create(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, status="NEW") except CharonError: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}" / libprep "{}"'.format(project, sample, libprep)) charon_session.libprep_update(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, status="NEW") else: LOG.info('Project "{}" / sample "{}" / libprep "{}" already ' 'exists; moving to libpreps'.format(project, sample, libprep)) for seqrun in libprep: try: LOG.info('Creating seqrun "{}"'.format(seqrun)) charon_session.seqrun_create(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name, total_reads=0, mean_autosomal_coverage=0, sequencing_status="DONE", alignment_status="NEW") except CharonError as e: if force_overwrite: LOG.warn('Overwriting data for project "{}" / ' 'sample "{}" / libprep "{}" / ' 'seqrun "{}"'.format(project, sample, libprep, seqrun)) charon_session.seqrun_update(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name, status="NEW") else: LOG.info('Project "{}" / sample "{}" / libprep "{}" / ' 'seqrun "{}" already exists; next...'.format(project, sample, libprep, seqrun))
def launch_analysis(level, projects_to_analyze, restart_failed_jobs=False, config=None, config_file_path=None): """Launch the appropriate seqrun (flowcell-level) analysis for each fastq file in the project. :param list projects_to_analyze: The list of projects (Project objects) to analyze :param dict config: The parsed NGI configuration file; optional/has default. :param str config_file_path: The path to the NGI configuration file; optional/has default. """ # Update Charon with the local state of all the jobs we're running update_charon_with_local_jobs_status() charon_session = CharonSession() for project in projects_to_analyze: # Get information from Charon regarding which workflows to run try: # E.g. "NGI" for NGI DNA Samples workflow = charon_session.project_get(project.project_id)["pipeline"] except (KeyError, CharonError) as e: # Workflow missing from Charon? LOG.error('Skipping project "{}" because of error: {}'.format(project, e)) continue try: analysis_engine_module_name = config["analysis"]["workflows"][workflow]["analysis_engine"] except KeyError: error_msg = ("No analysis engine for workflow \"{}\" specified " "in configuration file. Skipping this workflow " "for project {}".format(workflow, project)) LOG.error(error_msg) raise RuntimeError(error_msg) # Import the adapter module specified in the config file (e.g. piper_ngi) try: analysis_module = importlib.import_module(analysis_engine_module_name) except ImportError as e: error_msg = ('Skipping project "{}" workflow "{}": couldn\'t import ' 'module "{}": {}'.format(project, workflow, analysis_engine_module_name, e)) LOG.error(error_msg) # Next project continue # This is weird objects_to_process = [] if level == "sample": for sample in project: objects_to_process.append({"project": project, "sample": sample}) elif level == "seqrun": for sample in project: for libprep in sample: for seqrun in libprep: objects_to_process.append({"project": project, "sample": sample, "libprep": libprep, "seqrun": seqrun}) # Still weird and not so great for obj_dict in objects_to_process: project = obj_dict.get("project") sample = obj_dict.get("sample") libprep = obj_dict.get("libprep") seqrun = obj_dict.get("seqrun") try: if level == "seqrun": charon_reported_status = charon_session.seqrun_get(project.project_id, sample, libprep, seqrun)['alignment_status'] else: # sample-level charon_reported_status = charon_session.sample_get(project.project_id, sample)['status'] except (CharonError, KeyError) as e: LOG.warn('Unable to get required information from Charon for ' 'sample "{}" / project "{}" -- forcing it to new: {}'.format(sample, project, e)) if level == "seqrun": charon_session.seqrun_update(project.project_id, sample.name, libprep.name, seqrun.name, alignment_status="NEW") charon_reported_status = charon_session.seqrun_get(project.project_id, sample, libprep, seqrun)['alignment_status'] else: charon_session.sample_update(project.project_id, sample.name, status="NEW") charon_reported_status = charon_session.sample_get(project.project_id, sample)['status'] # Check Charon to ensure this hasn't already been processed if charon_reported_status in ("RUNNING", "DONE"): if level == "seqrun": LOG.info('Charon reports seqrun analysis for project "{}" / sample "{}" ' '/ libprep "{}" / seqrun "{}" does not need processing ' ' (already "{}")'.format(project, sample, libprep, seqrun, charon_reported_status)) else: # Sample LOG.info('Charon reports seqrun analysis for project "{}" / sample "{}" ' 'does not need processing ' ' (already "{}")'.format(project, sample, charon_reported_status)) continue elif charon_reported_status == "FAILED": if not restart_failed_jobs: if level == "seqrun": LOG.error('FAILED: Project "{}" / sample "{}" / library "{}" ' '/ flowcell "{}": Charon reports FAILURE, manual ' 'investigation needed!'.format(project, sample, libprep, seqrun)) else: # Sample LOG.error('FAILED: Project "{}" / sample "{}" Charon reports FAILURE, manual ' 'investigation needed!'.format(project, sample, libprep, seqrun)) continue try: # The engines themselves know which sub-workflows # they need to execute for a given level. For example, # with DNA Variant Calling on the sequencing run # level, we need to execute basic alignment and QC. if level == "seqrun": LOG.info('Attempting to launch seqrun analysis for ' 'project "{}" / sample "{}" / libprep "{}" ' '/ seqrun "{}", workflow "{}"'.format(project, sample, libprep, seqrun, workflow)) analysis_module.analyze_seqrun(project=project, sample=sample, libprep=libprep, seqrun=seqrun) else: # sample level LOG.info('Attempting to launch sample analysis for ' 'project "{}" / sample "{}" / workflow ' '"{}"'.format(project, sample, workflow)) analysis_module.analyze_sample(project=project, sample=sample) except Exception as e: raise LOG.error('Cannot process project "{}" / sample "{}" / ' 'libprep "{}" / seqrun "{}" / workflow ' '"{}" : {}'.format(project, sample, libprep, seqrun, workflow, e)) set_new_seqrun_status = "FAILED" continue
def create_charon_entries_from_project( project, best_practice_analysis="whole_genome_reseq", sequencing_facility="NGI-S", force_overwrite=False, retry_on_fail=True): """Given a project object, creates the relevant entries in Charon. This code is remarkably shoddy as I created it in a hurry and then later it became a part of the pipeline. Use at your own risk! Ha ha. :param NGIProject project: The NGIProject object :param str best_practice_analysis: The workflow to assign for this project (default "variant_calling") :param str sequencing_facility: The facility that did the sequencing :param bool force_overwrite: If this is set to true, overwrite existing entries in Charon (default false) """ charon_session = CharonSession() update_failed = False try: status = "OPEN" LOG.info( 'Creating project "{}" with status "{}", best practice analysis "{}", ' 'and sequencing_facility {}'.format(project, status, best_practice_analysis, sequencing_facility)) charon_session.project_create( projectid=project.project_id, name=project.name, status=status, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility) LOG.info('Project "{}" created in Charon.'.format(project)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warning( 'Overwriting data for project "{}"'.format(project)) charon_session.project_update( projectid=project.project_id, name=project.name, status=status, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility) LOG.info('Project "{}" updated in Charon.'.format(project)) else: LOG.info( 'Project "{}" already exists; moving to samples...'.format( project)) else: raise for sample in project: try: analysis_status = "TO_ANALYZE" sample_data_status_value = "STALE" LOG.info('Creating sample "{}" with analysis_status "{}"'.format( sample, analysis_status)) charon_session.sample_create(projectid=project.project_id, sampleid=sample.name, analysis_status=analysis_status) LOG.info('Project/sample "{}/{}" created in Charon.'.format( project, sample)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warning('Overwriting data for project "{}" / ' 'sample "{}"'.format(project, sample)) charon_session.sample_update( projectid=project.project_id, sampleid=sample.name, analysis_status=analysis_status, status=sample_data_status_value) LOG.info( 'Project/sample "{}/{}" updated in Charon.'.format( project, sample)) else: #update the status of the sample to STALE charon_session.sample_update( projectid=project.project_id, sampleid=sample.name, status=sample_data_status_value) LOG.info( 'Project "{}" / sample "{}" already exists; moving ' 'to libpreps'.format(project, sample)) else: update_failed = True LOG.error(e) continue for libprep in sample: try: qc = "PASSED" LOG.info('Creating libprep "{}" with qc status "{}"'.format( libprep, qc)) charon_session.libprep_create(projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, qc=qc) LOG.info(('Project/sample/libprep "{}/{}/{}" created in ' 'Charon').format(project, sample, libprep)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warning('Overwriting data for project "{}" / ' 'sample "{}" / libprep "{}"'.format( project, sample, libprep)) charon_session.libprep_update( projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, qc=qc) LOG.info( ('Project/sample/libprep "{}/{}/{}" updated in ' 'Charon').format(project, sample, libprep)) else: LOG.debug(e) LOG.info( 'Project "{}" / sample "{}" / libprep "{}" already ' 'exists; moving to libpreps'.format( project, sample, libprep)) else: update_failed = True LOG.error(e) continue for seqrun in libprep: try: alignment_status = "NOT_RUNNING" LOG.info('Creating seqrun "{}" with alignment_status "{}"'. format(seqrun, alignment_status)) charon_session.seqrun_create( projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name, alignment_status=alignment_status, total_reads=0, mean_autosomal_coverage=0) LOG.info( ('Project/sample/libprep/seqrun "{}/{}/{}/{}" ' 'created in Charon').format(project, sample, libprep, seqrun)) except CharonError as e: if e.status_code == 400: if force_overwrite: LOG.warning('Overwriting data for project "{}" / ' 'sample "{}" / libprep "{}" / ' 'seqrun "{}"'.format( project, sample, libprep, seqrun)) charon_session.seqrun_update( projectid=project.project_id, sampleid=sample.name, libprepid=libprep.name, seqrunid=seqrun.name, alignment_status=alignment_status, total_reads=0, mean_autosomal_coverage=0) LOG.info( ('Project/sample/libprep/seqrun "{}/{}/{}/{}" ' 'updated in Charon').format( project, sample, libprep, seqrun)) else: LOG.info( 'Project "{}" / sample "{}" / libprep "{}" / ' 'seqrun "{}" already exists; next...'.format( project, sample, libprep, seqrun)) else: update_failed = True LOG.error(e) continue if update_failed: if retry_on_fail: create_charon_entries_from_project( project, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility, force_overwrite=force_overwrite, retry_on_fail=False) else: raise CharonError("A network error blocks Charon updating.")