def record_process_sample(project, sample, workflow_subtask, analysis_module_name, analysis_dir, pid, config=None): LOG.info('Recording process id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(pid, project, sample, workflow_subtask)) with get_db_session() as session: seqrun_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, analysis_dir=analysis_dir, process_id=pid) ## FIXME We must make sure that an entry for this doesn't already exist! session.add(seqrun_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded process id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(pid, project, sample, workflow_subtask)) break except sqlalchemy.exc.OperationalError: LOG.warn("Database locked. Waiting...") time.sleep(15) else: raise RuntimeError('Could not record process id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(pid, project, sample, workflow_subtask))
def setUpClass(cls): cls.tmp_dir = tempfile.mkdtemp() cls.database_path = os.path.join(cls.tmp_dir, "temporary_database") cls.session = sql_db.get_db_session(database_path=cls.database_path) cls.project_name = "Y.Mom_14_01" cls.project_id = "P123" cls.sample_id = "{}_456".format(cls.project_id) cls.libprep_id = "A" cls.seqrun_id = gtd.generate_run_id() cls.engine = "piper_ngi"
def setUpClass(cls): cls.tmp_dir = tempfile.mkdtemp() cls.database_path = os.path.join(cls.tmp_dir, "temporary_database") cls.session = sql_db.get_db_session(database_path=cls.database_path) cls.project_name = "Y.Mom_14_01" cls.project_id = "P123" cls.sample_id = "{}_456".format(cls.project_id) cls.libprep_id = "A" cls.seqrun_id = gtd.generate_run_id() cls.engine = "piper_ngi"
def db_session(self): """ Context manager for the database session :return: a database session """ if self.tracking_session is not None: yield self.tracking_session else: with get_db_session(config=self.config) as db_session: self.tracking_session = db_session yield self.tracking_session
def db_session(self): """ Context manager for the database session :return: a database session """ if self.tracking_session is not None: yield self.tracking_session else: with get_db_session(config=self.config) as db_session: self.tracking_session = db_session yield self.tracking_session
def record_process_sample(project, sample, workflow_subtask, analysis_module_name, process_id=None, slurm_job_id=None, config=None): LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) with get_db_session() as session: sample_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, process_id=process_id, slurm_job_id=slurm_job_id) try: session.add(sample_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) break except OperationalError as e: LOG.warn('Database locked ("{}"). Waiting...'.format(e)) time.sleep(15) else: raise RuntimeError("Could not write to database after three attempts (locked?)") except (IntegrityError, RuntimeError): raise RuntimeError('Could not record slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e)) try: set_status = "UNDER_ANALYSIS" LOG.info(('Updating Charon status for project/sample ' '{}/{} to {}').format(project, sample, set_status)) CharonSession().sample_update(projectid=project.project_id, sampleid=sample.name, analysis_status=set_status) project_obj = create_project_obj_from_analysis_log(project.name, project.project_id, project.base_path, sample.name, workflow_subtask) recurse_status_for_sample(project_obj, "RUNNING") except CharonError as e: error_text = ('Could not update Charon status for project/sample ' '{}/{} due to error: {}'.format(project, sample, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name='piper_ngi', level="ERROR", info_text=error_text)
def is_sample_analysis_running_local(workflow_subtask, project_id, sample_id): """Determine if a sample is currently being analyzed by accessing the local process tracking database.""" sample_run_name = "{}/{}".format(project_id, sample_id) LOG.info('Checking if sample run "{}" is currently being analyzed ' '(workflow "{}")...'.format(sample_run_name, workflow_subtask)) with get_db_session() as session: db_q = session.query(SampleAnalysis).filter_by(workflow=workflow_subtask, project_id=project_id, sample_id=sample_id) if session.query(db_q.exists()).scalar(): LOG.info('...sample run "{}" is currently being analyzed.'.format(sample_run_name)) return True else: LOG.info('...sample run "{}" is not currently under analysis.'.format(sample_run_name)) return False
def is_seqrun_analysis_running_local(workflow_subtask, project_id, sample_id, libprep_id, seqrun_id): """Determine if a flowcell is currently being analyzed by accessing the local process tracking database. :returns: True if under analysis, False otherwise """ sequencing_run = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) LOG.info('Checking if sequencing run "{}" is currently ' 'being analyzed (workflow "{}")...'.format(sequencing_run, workflow_subtask)) with get_db_session() as session: db_q = session.query(SeqrunAnalysis).filter_by(workflow=workflow_subtask, project_id=project_id, sample_id=sample_id, libprep_id=libprep_id, seqrun_id=seqrun_id) if session.query(db_q.exists()).scalar(): LOG.info('...sequencing run "{}" is currently being analyzed.'.format(sequencing_run)) return True else: LOG.info('...sequencing run "{}" is not currently under analysis.'.format(sequencing_run)) return False
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None): """Check the status of all locally-tracked jobs and update Charon accordingly. """ if quiet and not config.get("quiet"): config['quiet'] = True LOG.info("Updating Charon with the status of all locally-tracked jobs...") with get_db_session() as session: charon_session = CharonSession() for sample_entry in session.query(SampleAnalysis).all(): # Local names workflow = sample_entry.workflow project_name = sample_entry.project_name project_id = sample_entry.project_id project_base_path = sample_entry.project_base_path sample_id = sample_entry.sample_id engine = sample_entry.engine # Only one of these id fields (slurm, pid) will have a value slurm_job_id = sample_entry.slurm_job_id process_id = sample_entry.process_id piper_exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) label = "project/sample {}/{}".format(project_name, sample_id) if workflow not in ("merge_process_variantcall", "genotype_concordance",): LOG.error('Unknown workflow "{}" for {}; cannot update ' 'Charon. Skipping sample.'.format(workflow, label)) continue try: project_obj = create_project_obj_from_analysis_log(project_name, project_id, project_base_path, sample_id, workflow) except IOError as e: # analysis log file is missing! error_text = ('Could not find analysis log file! Cannot update ' 'Charon for {} run {}/{}: {}'.format(workflow, project_id, sample_id, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) continue try: if piper_exit_code == 0: # 0 -> Job finished successfully if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" set_status = "ANALYZED" # sample level elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" set_status = "DONE" # sample level recurse_status = "DONE" # For the seqrun level info_text = ('Workflow "{}" for {} finished succesfully. ' 'Recording status {} in Charon'.format(workflow, label, set_status)) LOG.info(info_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="INFO", info_text=info_text, workflow=workflow) charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=recurse_status, config=config) # Job is only deleted if the Charon status update succeeds session.delete(sample_entry) #run MultiQC LOG.info("Running MultiQC on project {}".format(project_name)) try: run_multiqc(project_base_path, project_id, project_name) except Exception as e: LOG.error(e) if workflow == "merge_process_variantcall": # Parse seqrun output results / update Charon # This is a semi-optional step -- failure here will send an # email but not more than once. The record is still removed # from the local jobs database, so this will have to be done # manually if you want it done at all. piper_qc_dir = os.path.join(project_base_path, "ANALYSIS", project_id, "piper_ngi", "02_preliminary_alignment_qc") update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir) update_sample_duplication_and_coverage(project_id, sample_id, project_base_path) elif workflow == "genotype_concordance": piper_gt_dir = os.path.join(project_base_path, "ANALYSIS", project_id, "piper_ngi", "03_genotype_concordance") try: update_gtc_for_sample(project_id, sample_id, piper_gt_dir) except (CharonError, IOError, ValueError) as e: LOG.error(e) elif type(piper_exit_code) is int and piper_exit_code > 0: # 1 -> Job failed set_status = "FAILED" error_text = ('Workflow "{}" for {} failed. Recording status ' '{} in Charon.'.format(workflow, label, set_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status, config=config) # Job is only deleted if the Charon update succeeds session.delete(sample_entry) else: # None -> Job still running OR exit code was never written (failure) JOB_FAILED = None if slurm_job_id: try: slurm_exit_code = get_slurm_job_status(slurm_job_id) except ValueError as e: slurm_exit_code = 1 if slurm_exit_code is not None: # "None" indicates job is still running JOB_FAILED = True else: if not psutil.pid_exists(process_id): # Job did not write an exit code and is also not running JOB_FAILED = True if JOB_FAILED: set_status = "FAILED" error_text = ('No exit code found but job not running ' 'for {} / {}: setting status to {} in ' 'Charon'.format(label, workflow, set_status)) if slurm_job_id: exit_code_file_path = \ create_exit_code_file_path(workflow_subtask=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) error_text += (' (slurm job id "{}", exit code file path ' '"{}")'.format(slurm_job_id, exit_code_file_path)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status, config=config) # Job is only deleted if the Charon update succeeds LOG.debug("Deleting local entry {}".format(sample_entry)) session.delete(sample_entry) else: # Job still running set_status = "UNDER_ANALYSIS" if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" recurse_status = "RUNNING" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" recurse_status = "UNDER_ANALYSIS" try: charon_status = \ charon_session.sample_get(projectid=project_id, sampleid=sample_id).get(sample_status_field) if charon_status and not charon_status == set_status: LOG.warn('Tracking inconsistency for {}: Charon status ' 'for field "{}" is "{}" but local process tracking ' 'database indicates it is running. Setting value ' 'in Charon to {}.'.format(label, sample_status_field, charon_status, set_status)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=recurse_status, config=config) except CharonError as e: error_text = ('Unable to update/verify Charon ' 'for {}: {}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) except CharonError as e: error_text = ('Unable to update Charon for {}: ' '{}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) except OSError as e: error_text = ('Permissions error when trying to update Charon ' '"{}" status for "{}": {}'.format(workflow, label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) session.commit()
def update_charon_with_local_jobs_status(config=None, config_file_path=None): """Check the status of all locally-tracked jobs and update Charon accordingly. """ LOG.info("Updating Charon with the status of all locally-tracked jobs...") with get_db_session() as session: charon_session = CharonSession() for sample_entry in session.query(SampleAnalysis).all(): # Local names workflow = sample_entry.workflow project_name = sample_entry.project_name project_id = sample_entry.project_id project_base_path = sample_entry.project_base_path sample_id = sample_entry.sample_id engine=sample_entry.engine # Only one of these will have a value slurm_job_id = sample_entry.slurm_job_id process_id = sample_entry.process_id piper_exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) label = "project/sample {}/{}".format(project_name, sample_id) try: project_obj = create_project_obj_from_analysis_log(project_name, project_id, project_base_path, sample_id, workflow) except IOError as e: # analysis log file is missing! error_text = ('Could not find analysis log file! Cannot update ' 'Charon for sample run {}/{}: {}'.format(project_id, sample_id, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) continue try: if piper_exit_code and piper_exit_code == 0: # 0 -> Job finished successfully set_status = "ANALYZED" info_text = ('Workflow "{}" for {} finished succesfully. ' 'Recording status {} in Charon'.format(workflow, label, set_status)) LOG.info(info_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="INFO", info_text=info_text) charon_session.sample_update(projectid=project_id, sampleid=sample_id, analysis_status=set_status) recurse_status="DONE" recurse_status_for_sample(project_obj, recurse_status) # Job is only deleted if the Charon status update succeeds session.delete(sample_entry) # Parse seqrun output results / update Charon # This is a semi-optional step -- failure here will send an # email but not more than once. The record is still removed # from the local jobs database, so this will have to be done # manually if you want it done at all. piper_qc_dir = os.path.join(project_base_path, "ANALYSIS", project_id,"piper_ngi", "02_preliminary_alignment_qc") update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir) elif piper_exit_code and piper_exit_code >0: # 1 -> Job failed set_status = "FAILED" error_text = ('Workflow "{}" for {} failed. Recording status ' '{} in Charon.'.format(workflow, label, set_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) charon_session.sample_update(projectid=project_id, sampleid=sample_id, analysis_status=set_status) recurse_status_for_sample(project_obj, set_status) # Job is only deleted if the Charon update succeeds session.delete(sample_entry) else: # None -> Job still running OR exit code was never written (failure) JOB_FAILED = None if slurm_job_id: try: slurm_exit_code = get_slurm_job_status(slurm_job_id) except ValueError as e: slurm_exit_code = 1 if slurm_exit_code is not None: # "None" indicates job is still running JOB_FAILED = True else: if not psutil.pid_exists(process_id): # Job did not write an exit code and is also not running JOB_FAILED = True if JOB_FAILED: set_status = "FAILED" error_text = ('No exit code found but job not running for ' '{}: setting status to {} in Charon'.format(label, set_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) charon_session.sample_update(projectid=project_id, sampleid=sample_id, analysis_status=set_status) recurse_status_for_sample(project_obj, set_status) # Job is only deleted if the Charon update succeeds LOG.debug("Deleting local entry {}".format(sample_entry)) session.delete(sample_entry) else: # Job still running charon_status = charon_session.sample_get(projectid=project_id, sampleid=sample_id)['analysis_status'] if not charon_status == "UNDER_ANALYSIS": set_status = "UNDER_ANALYSIS" LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but ' 'local process tracking database indicates it is running. ' 'Setting value in Charon to {}.'.format(label, charon_status, set_status)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, analysis_status=set_status) recurse_status_for_sample(project_obj, "RUNNING") except CharonError as e: error_text = ('Unable to update Charon status for "{}": {}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) except OSError as e: error_text = ('Permissions error when trying to update Charon ' 'status for "{}": {}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) session.commit()
#!/bin/env python from __future__ import print_function import argparse import importlib from ngi_pipeline.engines.piper_ngi.local_process_tracking import update_charon_with_local_jobs_status from ngi_pipeline.engines.piper_ngi.database import SampleAnalysis, get_db_session if __name__=="__main__": parser = argparse.ArgumentParser("Show all the jobs currently running (currently just for Piper).") parser.add_argument("-q", "--quiet", action="store_true", help="Don't send notification emails on status changes.") args = parser.parse_args() update_charon_with_local_jobs_status(quiet=args.quiet) with get_db_session() as session: sample_jobs = session.query(SampleAnalysis).all() print("\nSample-level analysis jobs:") if sample_jobs: for sample_job in sample_jobs: print("\t{}".format(sample_job)) else: print("\tNone") print()
def update_charon_with_local_jobs_status(): """Check the status of all locally-tracked jobs and update Charon accordingly. """ LOG.info("Updating Charon with the status of all locally-tracked jobs...") with get_db_session() as session: charon_session = CharonSession() # Sequencing Run Analyses for seqrun_entry in session.query(SeqrunAnalysis).all(): # Local names workflow = seqrun_entry.workflow project_name = seqrun_entry.project_name project_id = seqrun_entry.project_id project_base_path = seqrun_entry.project_base_path sample_id = seqrun_entry.sample_id libprep_id = seqrun_entry.libprep_id seqrun_id = seqrun_entry.seqrun_id pid = seqrun_entry.process_id exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, sample_id=sample_id, libprep_id=libprep_id, seqrun_id=seqrun_id) label = "project/sample/libprep/seqrun {}/{}/{}/{}".format(project_name, sample_id, libprep_id, seqrun_id) try: if exit_code == 0: # 0 -> Job finished successfully LOG.info('Workflow "{}" for {} finished succesfully. ' 'Recording status "DONE" in Charon'.format(workflow, label)) set_alignment_status = "DONE" try: write_to_charon_alignment_results(base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id, libprep_id=libprep_id, seqrun_id=seqrun_id) except (RuntimeError, ValueError) as e: LOG.error(e) set_alignment_status = "FAILED" charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status=set_alignment_status) # Job is only deleted if the Charon update succeeds session.delete(seqrun_entry) elif exit_code == 1 or (not psutil.pid_exists(pid) and not exit_code): if exit_code == 1: # 1 -> Job failed (DATA_FAILURE / COMPUTATION_FAILURE ?) LOG.info('Workflow "{}" for {} failed. Recording status ' '"FAILED" in Charon.'.format(workflow, label)) else: # Job failed without writing an exit code (process no longer running) LOG.error('ERROR: No exit code found for process {} ' 'but it does not appear to be running ' '(pid {} does not exist). Setting status to ' '"FAILED", inspect manually'.format(label, pid)) charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status="FAILED") # Job is only deleted if the Charon update succeeds LOG.debug("Deleting local entry {}".format(seqrun_entry)) session.delete(seqrun_entry) else: # None -> Job still running charon_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id)['alignment_status'] if not charon_status == "RUNNING": LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but ' 'local process tracking database indicates it is running. ' 'Setting value in Charon to RUNNING.'.format(label, charon_status)) charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status="RUNNING") except CharonError as e: LOG.error('Unable to update Charon status for "{}": {}'.format(label, e)) for sample_entry in session.query(SampleAnalysis).all(): # Local names workflow = sample_entry.workflow project_name = sample_entry.project_name project_id = sample_entry.project_id project_base_path = sample_entry.project_base_path sample_id = sample_entry.sample_id pid = sample_entry.process_id exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, sample_id=sample_id) label = "project/sample/libprep/seqrun {}/{}".format(project_name, sample_id) try: if exit_code == 0: # 0 -> Job finished successfully LOG.info('Workflow "{}" for {} finished succesfully. ' 'Recording status "DONE" in Charon'.format(workflow, label)) set_status = "DONE" ## TODO implement sample-level analysis results parsing / reporting to Charon? #try: # write_to_charon_alignment_results(base_path=project_base_path, # project_name=project_name, # project_id=project_id, # sample_id=sample_id, # libprep_id=libprep_id, # seqrun_id=seqrun_id) #except (RuntimeError, ValueError) as e: # LOG.error(e) # set_alignment_status = "FAILED" charon_session.sample_update(projectid=project_id, sampleid=sample_id, status=set_status) # Job is only deleted if the Charon update succeeds session.delete(sample_entry) elif exit_code == 1 or (not psutil.pid_exists(pid) and not exit_code): if exit_code == 1: # 1 -> Job failed (DATA_FAILURE / COMPUTATION_FAILURE ?) LOG.info('Workflow "{}" for {} failed. Recording status ' '"COMPUTATION_FAILED" in Charon.'.format(workflow, label)) else: # Job failed without writing an exit code LOG.error('ERROR: No exit code found for process {} ' 'but it does not appear to be running ' '(pid {} does not exist). Setting status to ' '"COMPUTATION_FAILED", inspect manually'.format(label, pid)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, status="COMPUTATION_FAILED") # Job is only deleted if the Charon update succeeds session.delete(sample_entry) else: # None -> Job still running try: charon_status = charon_session.sample_get(projectid=project_id, sampleid=sample_id)['status'] except (CharonError, KeyError) as e: LOG.warn('Unable to get required information from Charon for ' 'sample "{}" / project "{}" -- forcing it to RUNNING: {}'.format(sample_id, project_id, e)) charon_status = "NEW" if not charon_status == "RUNNING": LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but ' 'local process tracking database indicates it is running. ' 'Setting value in Charon to RUNNING.'.format(label, charon_status)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, status="RUNNING") except CharonError as e: LOG.error('Unable to update Charon status for "{}": {}'.format(label, e)) session.commit()
def kill_running_sample_analysis(workflow_subtask, project_id, sample_id): """Determine if a sample is currently being analyzed by accessing the local process tracking database.""" sample_run_name = "{}/{}".format(project_id, sample_id) LOG.info('Attempting to kill sample analysis run "{}"'.format(sample_run_name)) LOG.info('Checking if sample run "{}" is currently being analyzed ' '(workflow "{}")...'.format(sample_run_name, workflow_subtask)) with get_db_session() as session: db_q = session.query(SampleAnalysis).filter_by(workflow=workflow_subtask, project_id=project_id, sample_id=sample_id) sample_run = db_q.first() if sample_run: try: slurm_job_id = sample_run.slurm_job_id LOG.info('...sample run "{}" is currently being analyzed ' '(workflow subtask "{}") and has slurm job id "{}"; ' 'trying to kill it...'.format(sample_run_name, workflow_subtask, slurm_job_id)) kill_slurm_job_by_id(slurm_job_id) except Exception as e: LOG.error('Could not kill sample run "{}": {}'.format(sample_run_name, e)) return False try: project_obj = create_project_obj_from_analysis_log(sample_run.project_name, sample_run.project_id, sample_run.project_base_path, sample_run.sample_id, sample_run.workflow) except IOError as e: # analysis log file is missing! error_text = ('Could not find analysis log file! Cannot update ' 'Charon for {} run {}/{}: {}'.format(sample_run.workflow, sample_run.project_id, sample_run.sample_id, e)) LOG.error(error_text) else: try: charon_session = CharonSession() set_status = "FAILED" if workflow_subtask == "genotype_concordance": status_field = "genotype_status" elif workflow_subtask == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status) except CharonError as e: LOG.error('Couldn\'t update Charon field "{}" to "{} for ' 'project/sample "{}/{}"'.format(status_field, set_status, project_id, sample_id)) try: LOG.info('Removing sample run "{}" from local jobs database...'.format(sample_run_name)) # Remove from local jobs database session.delete(sample_run) session.commit() LOG.info("Deleted.") except Exception as e: LOG.error('Failed to remove entry for sample run "{}" from ' 'local jobs database: {}'.format(sample_run_name, e)) else: LOG.info('...sample run "{}" is not currently under analysis.'.format(sample_run_name)) return True
def record_process_sample(project, sample, workflow_subtask, analysis_module_name, process_id=None, slurm_job_id=None, config=None, config_file_path=None): LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) with get_db_session() as session: sample_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, process_id=process_id, slurm_job_id=slurm_job_id) try: session.add(sample_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) break except OperationalError as e: LOG.warning('Database locked ("{}"). Waiting...'.format(e)) time.sleep(15) else: raise RuntimeError("Could not write to database after three attempts (locked?)") except (IntegrityError, RuntimeError) as e: raise RuntimeError('Could not record slurm job id "{}" for project "{}", ' 'sample "{}", workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e.message)) extra_args = None if workflow_subtask == "merge_process_variantcall": sample_status_field = "analysis_status" sample_status_value = "UNDER_ANALYSIS" sample_data_status_field = "status" sample_data_status_value = '' #in his way it will not be updated seqrun_status_field = "alignment_status" seqrun_status_value = "RUNNING" extra_args = {"mean_autosomal_coverage": 0} elif workflow_subtask == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" sample_status_value = seqrun_status_value = "UNDER_ANALYSIS" sample_data_status_field = "status" sample_data_status_value = "STALE" else: raise ValueError('Charon field for workflow "{}" unknown; ' 'cannot update Charon.'.format(workflow_subtask)) try: LOG.info('Updating Charon status for project/sample ' '{}/{} key : {} value : {}'.format(project, sample, sample_status_field, sample_status_value)) CharonSession().sample_update(projectid=project.project_id, sampleid=sample.name, **{sample_status_field: sample_status_value, sample_data_status_field: sample_data_status_value}) project_obj = create_project_obj_from_analysis_log(project.name, project.project_id, project.base_path, sample.name, workflow_subtask) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=seqrun_status_value, extra_args=extra_args, config=config) except CharonError as e: error_text = ('Could not update Charon status for project/sample ' '{}/{} due to error: {}'.format(project, sample, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.project_id, sample_name=sample.name, engine_name='piper_ngi', level="ERROR", info_text=error_text, workflow=workflow_subtask)
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None): """Check the status of all locally-tracked jobs and update Charon accordingly. """ if quiet and not config.get("quiet"): config['quiet'] = True LOG.info("Updating Charon with the status of all locally-tracked jobs...") multiqc_projects=set() with get_db_session() as session: charon_session = CharonSession() for sample_entry in session.query(SampleAnalysis).all(): # Local names workflow = sample_entry.workflow project_name = sample_entry.project_name project_id = sample_entry.project_id project_base_path = sample_entry.project_base_path sample_id = sample_entry.sample_id engine = sample_entry.engine # Only one of these id fields (slurm, pid) will have a value slurm_job_id = sample_entry.slurm_job_id process_id = sample_entry.process_id piper_exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) label = "project/sample {}/{}".format(project_name, sample_id) if workflow not in ("merge_process_variantcall", "genotype_concordance",): LOG.error('Unknown workflow "{}" for {}; cannot update ' 'Charon. Skipping sample.'.format(workflow, label)) continue try: project_obj = create_project_obj_from_analysis_log(project_name, project_id, project_base_path, sample_id, workflow) except IOError as e: # analysis log file is missing! error_text = ('Could not find analysis log file! Cannot update ' 'Charon for {} run {}/{}: {}'.format(workflow, project_id, sample_id, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) continue try: if piper_exit_code == 0: # 0 -> Job finished successfully if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" set_status = "ANALYZED" # sample level elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" set_status = "DONE" # sample level recurse_status = "DONE" # For the seqrun level info_text = ('Workflow "{}" for {} finished succesfully. ' 'Recording status {} in Charon'.format(workflow, label, set_status)) LOG.info(info_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="INFO", info_text=info_text, workflow=workflow) charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=recurse_status, config=config) # Job is only deleted if the Charon status update succeeds session.delete(sample_entry) #add project to MultiQC multiqc_projects.add((project_base_path, project_id, project_name)) if workflow == "merge_process_variantcall": # Parse seqrun output results / update Charon # This is a semi-optional step -- failure here will send an # email but not more than once. The record is still removed # from the local jobs database, so this will have to be done # manually if you want it done at all. piper_qc_dir = os.path.join(project_base_path, "ANALYSIS", project_id, "piper_ngi", "02_preliminary_alignment_qc") update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir) update_sample_duplication_and_coverage(project_id, sample_id, project_base_path) elif workflow == "genotype_concordance": piper_gt_dir = os.path.join(project_base_path, "ANALYSIS", project_id, "piper_ngi", "03_genotype_concordance") try: update_gtc_for_sample(project_id, sample_id, piper_gt_dir) except (CharonError, IOError, ValueError) as e: LOG.error(e) elif type(piper_exit_code) is int and piper_exit_code > 0: # 1 -> Job failed set_status = "FAILED" error_text = ('Workflow "{}" for {} failed. Recording status ' '{} in Charon.'.format(workflow, label, set_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status, config=config) # Job is only deleted if the Charon update succeeds session.delete(sample_entry) else: # None -> Job still running OR exit code was never written (failure) JOB_FAILED = None if slurm_job_id: try: slurm_exit_code = get_slurm_job_status(slurm_job_id) except ValueError as e: slurm_exit_code = 1 if slurm_exit_code is not None: # "None" indicates job is still running JOB_FAILED = True else: if not psutil.pid_exists(process_id): # Job did not write an exit code and is also not running JOB_FAILED = True if JOB_FAILED: set_status = "FAILED" error_text = ('No exit code found but job not running ' 'for {} / {}: setting status to {} in ' 'Charon'.format(label, workflow, set_status)) if slurm_job_id: exit_code_file_path = \ create_exit_code_file_path(workflow_subtask=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) error_text += (' (slurm job id "{}", exit code file path ' '"{}")'.format(slurm_job_id, exit_code_file_path)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status, config=config) # Job is only deleted if the Charon update succeeds LOG.debug("Deleting local entry {}".format(sample_entry)) session.delete(sample_entry) else: # Job still running set_status = "UNDER_ANALYSIS" if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" recurse_status = "RUNNING" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" recurse_status = "UNDER_ANALYSIS" try: remote_sample=charon_session.sample_get(projectid=project_id, sampleid=sample_id) charon_status = remote_sample.get(sample_status_field) if charon_status and not charon_status == set_status: LOG.warning('Tracking inconsistency for {}: Charon status ' 'for field "{}" is "{}" but local process tracking ' 'database indicates it is running. Setting value ' 'in Charon to {}.'.format(label, sample_status_field, charon_status, set_status)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=recurse_status, config=config) except CharonError as e: error_text = ('Unable to update/verify Charon ' 'for {}: {}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) except CharonError as e: error_text = ('Unable to update Charon for {}: ' '{}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) except OSError as e: error_text = ('Permissions error when trying to update Charon ' '"{}" status for "{}": {}'.format(workflow, label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) session.commit() #Run Multiqc for pj_tuple in multiqc_projects: LOG.info("Running MultiQC on project {}".format(pj_tuple[1])) run_multiqc(pj_tuple[0], pj_tuple[1], pj_tuple[2])
def record_process_sample(project, sample, workflow_subtask, analysis_module_name, process_id=None, slurm_job_id=None, config=None, config_file_path=None): LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) with get_db_session() as session: sample_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, process_id=process_id, slurm_job_id=slurm_job_id) try: session.add(sample_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) break except OperationalError as e: LOG.warn('Database locked ("{}"). Waiting...'.format(e)) time.sleep(15) else: raise RuntimeError("Could not write to database after three attempts (locked?)") except (IntegrityError, RuntimeError) as e: raise RuntimeError('Could not record slurm job id "{}" for project "{}", ' 'sample "{}", workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e.message)) extra_args = None if workflow_subtask == "merge_process_variantcall": sample_status_field = "analysis_status" sample_status_value = "UNDER_ANALYSIS" seqrun_status_field = "alignment_status" seqrun_status_value = "RUNNING" extra_args = {"mean_autosomal_coverage": 0} elif workflow_subtask == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" sample_status_value = seqrun_status_value = "UNDER_ANALYSIS" else: raise ValueError('Charon field for workflow "{}" unknown; ' 'cannot update Charon.'.format(workflow_subtask)) try: LOG.info('Updating Charon status for project/sample ' '{}/{} key : {} value : {}'.format(project, sample, sample_status_field, sample_status_value)) CharonSession().sample_update(projectid=project.project_id, sampleid=sample.name, **{sample_status_field: sample_status_value}) project_obj = create_project_obj_from_analysis_log(project.name, project.project_id, project.base_path, sample.name, workflow_subtask) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=seqrun_status_value, extra_args=extra_args, config=config) except CharonError as e: error_text = ('Could not update Charon status for project/sample ' '{}/{} due to error: {}'.format(project, sample, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.project_id, sample_name=sample.name, engine_name='piper_ngi', level="ERROR", info_text=error_text, workflow=workflow_subtask)
def kill_running_sample_analysis(workflow_subtask, project_id, sample_id): """Determine if a sample is currently being analyzed by accessing the local process tracking database.""" sample_run_name = "{}/{}".format(project_id, sample_id) LOG.info('Attempting to kill sample analysis run "{}"'.format(sample_run_name)) LOG.info('Checking if sample run "{}" is currently being analyzed ' '(workflow "{}")...'.format(sample_run_name, workflow_subtask)) with get_db_session() as session: db_q = session.query(SampleAnalysis).filter_by(workflow=workflow_subtask, project_id=project_id, sample_id=sample_id) sample_run = db_q.first() if sample_run: try: slurm_job_id = sample_run.slurm_job_id LOG.info('...sample run "{}" is currently being analyzed ' '(workflow subtask "{}") and has slurm job id "{}"; ' 'trying to kill it...'.format(sample_run_name, workflow_subtask, slurm_job_id)) kill_slurm_job_by_id(slurm_job_id) except Exception as e: LOG.error('Could not kill sample run "{}": {}'.format(sample_run_name, e)) return False try: project_obj = create_project_obj_from_analysis_log(sample_run.project_name, sample_run.project_id, sample_run.project_base_path, sample_run.sample_id, sample_run.workflow) except IOError as e: # analysis log file is missing! error_text = ('Could not find analysis log file! Cannot update ' 'Charon for {} run {}/{}: {}'.format(sample_run.workflow, sample_run.project_id, sample_run.sample_id, e)) LOG.error(error_text) else: try: charon_session = CharonSession() set_status = "FAILED" if workflow_subtask == "genotype_concordance": status_field = "genotype_status" elif workflow_subtask == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status) except CharonError as e: LOG.error('Couldn\'t update Charon field "{}" to "{} for ' 'project/sample "{}/{}"'.format(status_field, set_status, project_id, sample_id)) try: LOG.info('Removing sample run "{}" from local jobs database...'.format(sample_run_name)) # Remove from local jobs database session.delete(sample_run) session.commit() LOG.info("Deleted.") except Exception as e: LOG.error('Failed to remove entry for sample run "{}" from ' 'local jobs database: {}'.format(sample_run_name, e)) else: LOG.info('...sample run "{}" is not currently under analysis.'.format(sample_run_name)) return True
#!/bin/env python from __future__ import print_function import argparse import importlib from ngi_pipeline.engines.piper_ngi.local_process_tracking import update_charon_with_local_jobs_status from ngi_pipeline.engines.piper_ngi.database import SampleAnalysis, get_db_session if __name__ == "__main__": parser = argparse.ArgumentParser( "Show all the jobs currently running (currently just for Piper).") update_charon_with_local_jobs_status() with get_db_session() as session: sample_jobs = session.query(SampleAnalysis).all() print("\nSample-level analysis jobs:") if sample_jobs: for sample_job in sample_jobs: print("\t{}".format(sample_job)) else: print("\tNone") print()