def post_qc(self, qc_file, status, log_file, rcp): """ Checks wether a run has passed the final qc. :param str run: Run directory :param str qc_file: Path to file with information about transferred runs :param str log_file: Path to the log file :param str rcp: destinatary """ already_seen=False runname=self.id shortrun=runname.split('_')[0] + '_' +runname.split('_')[-1] with open(qc_file, 'ab+') as f: f.seek(0) for row in f: #Rows have two columns: run and transfer date if row.split('\t')[0] == runname: already_seen=True if not already_seen: if status: f.write("{}\tPASSED\n".format(runname)) else: sj="{} failed QC".format(runname) cnt="""The run {run} has failed qc and will NOT be transfered to Nestor. The run might be available at : https://genomics-status.scilifelab.se/flowcells/{shortfc} To read the logs, run the following command on {server} grep -A30 "Checking run {run}" {log} To force the transfer : taca analysis transfer {rundir} """.format(run=runname, shortfc=shortrun, log=log_file, server=os.uname()[1], rundir=self.id) misc.send_mail(sj, cnt, rcp) f.write("{}\tFAILED\n".format(os.path.basename(self.id)))
def cleanup_processing(seconds): """Cleanup runs in processing server. :param int seconds: Days/hours converted as second to consider a run to be old """ try: #Remove old runs from archiving dirs for archive_dir in CONFIG.get('storage').get('archive_dirs').values(): logger.info('Removing old runs in {}'.format(archive_dir)) with filesystem.chdir(archive_dir): for run in [r for r in os.listdir(archive_dir) if re.match(filesystem.RUN_RE, r)]: rta_file = os.path.join(run, finished_run_indicator) if os.path.exists(rta_file): if os.stat(rta_file).st_mtime < time.time() - seconds: logger.info('Removing run {} to nosync directory'.format(os.path.basename(run))) shutil.rmtree(run) else: logger.info('{} file exists but is not older than given time, skipping run {}'.format( finished_run_indicator, run)) except IOError: sbj = "Cannot archive old runs in processing server" msg = ("Could not find transfer.tsv file, so I cannot decide if I should " "archive any run or not.") cnt = CONFIG.get('contact', None) if not cnt: cnt = "{}@localhost".format(getpass.getuser()) logger.error(msg) misc.send_mail(sbj, msg, cnt)
def cleanup_nas(seconds): """Will move the finished runs in NASes to nosync directory. :param int seconds: Days/hours converted as second to consider a run to be old """ couch_info = CONFIG.get('statusdb') mail_recipients = CONFIG.get('mail', {}).get('recipients') check_demux = CONFIG.get('storage', {}).get('check_demux', False) host_name = os.getenv('HOSTNAME', os.uname()[1]).split('.', 1)[0] for data_dir in CONFIG.get('storage').get('data_dirs'): logger.info('Moving old runs in {}'.format(data_dir)) with filesystem.chdir(data_dir): for run in [r for r in os.listdir(data_dir) if re.match(filesystem.RUN_RE, r)]: rta_file = os.path.join(run, finished_run_indicator) if os.path.exists(rta_file): if check_demux: if misc.run_is_demuxed(run, couch_info): logger.info('Moving run {} to nosync directory'.format(os.path.basename(run))) shutil.move(run, 'nosync') elif os.stat(rta_file).st_mtime < time.time() - seconds: logger.warn('Run {} is older than given time, but it is not demultiplexed yet' .format(run)) sbt = "Run not demultiplexed - {}".format(run) msg = ("Run '{}' in '{}' is older then given threshold, but seems like it is not " "yet demultiplexed".format(os.path.join(data_dir, run), host_name)) misc.send_mail(sbt, msg, mail_recipients) else: if os.stat(rta_file).st_mtime < time.time() - seconds: logger.info('Moving run {} to nosync directory'.format(os.path.basename(run))) shutil.move(run, 'nosync') else: logger.info('{} file exists but is not older than given time, skipping run {}' .format(finished_run_indicator, run))
def _check_status(self, cmd, status, err_msg, mail_failed, files_to_remove=[]): """Check if a subprocess status is success and log error if failed""" if status != 0: self._clean_tmp_files(files_to_remove) if mail_failed: subjt = "Command call failed - {}".format(self.host_name) e_msg = "Called cmd: {}\n\nError msg: {}".format(" ".join(cmd), err_msg) misc.send_mail(subjt, e_msg, self.mail_recipients) logger.error("Command '{}' failed with the error '{}'".format(" ".join(cmd),err_msg)) return False return True
def cleanup_processing(days): """Cleanup runs in processing server. :param int days: Number of days to consider a run to be old """ transfer_file = os.path.join(CONFIG.get("preprocessing", {}).get("status_dir"), "transfer.tsv") if not days: days = CONFIG.get("cleanup", {}).get("processing-server", {}).get("days", 10) try: # Move finished runs to nosync for data_dir in CONFIG.get("storage").get("data_dirs"): logger.info("Moving old runs in {}".format(data_dir)) with filesystem.chdir(data_dir): for run in [r for r in os.listdir(data_dir) if re.match(filesystem.RUN_RE, r)]: if filesystem.is_in_file(transfer_file, run): logger.info("Moving run {} to nosync directory".format(os.path.basename(run))) shutil.move(run, "nosync") else: logger.info( ("Run {} has not been transferred to the analysis " "server yet, not archiving".format(run)) ) # Remove old runs from archiving dirs for archive_dir in CONFIG.get("storage").get("archive_dirs").values(): logger.info("Removing old runs in {}".format(archive_dir)) with filesystem.chdir(archive_dir): for run in [r for r in os.listdir(archive_dir) if re.match(filesystem.RUN_RE, r)]: rta_file = os.path.join(run, "RTAComplete.txt") if os.path.exists(rta_file): # 1 day == 60*60*24 seconds --> 86400 if os.stat(rta_file).st_mtime < time.time() - (86400 * days) and filesystem.is_in_swestore( "{}.tar.bz2".format(run) ): logger.info("Removing run {} to nosync directory".format(os.path.basename(run))) shutil.rmtree(run) else: logger.info( "RTAComplete.txt file exists but is not older than {} day(s), skipping run {}".format( str(days), run ) ) except IOError: sbj = "Cannot archive old runs in processing server" msg = "Could not find transfer.tsv file, so I cannot decide if I should " "archive any run or not." cnt = CONFIG.get("contact", None) if not cnt: cnt = "{}@localhost".format(getpass.getuser()) logger.error(msg) misc.send_mail(sbj, msg, cnt)
def error_emailer(flag, info): recipients = CONFIG['mail']['recipients'] #failed_run: Samplesheet for a given project couldn't be found body='TACA has encountered an issue that might be worth investigating\n' body+='The offending entry is: ' body+= info body+='\n\nSincerely, TACA' if (flag == 'no_samplesheet'): subject='ERROR, Samplesheet error' elif (flag == "failed_run"): subject='WARNING, Reinitialization of partially failed FC' elif (flag == 'weird_samplesheet'): subject='ERROR, Incorrectly formatted samplesheet' hourNow = datetime.datetime.now().hour if hourNow == 7 or hourNow == 12 or hourNow == 16: send_mail(subject, body, recipients)
def error_emailer(flag, info): recipients = CONFIG['mail']['recipients'] #failed_run: Samplesheet for a given project couldn't be found body = 'TACA has encountered an issue that might be worth investigating\n' body += 'The offending entry is: ' body += info body += '\n\nSincerely, TACA' if (flag == 'no_samplesheet'): subject = 'ERROR, Samplesheet error' elif (flag == "failed_run"): subject = 'WARNING, Reinitialization of partially failed FC' elif (flag == 'weird_samplesheet'): subject = 'ERROR, Incorrectly formatted samplesheet' hourNow = datetime.datetime.now().hour if hourNow == 7 or hourNow == 12 or hourNow == 16: send_mail(subject, body, recipients)
def cleanup_nas(seconds): """Will move the finished runs in NASes to nosync directory. :param int seconds: Days/hours converted as second to consider a run to be old """ couch_info = CONFIG.get('statusdb') mail_recipients = CONFIG.get('mail', {}).get('recipients') check_demux = CONFIG.get('storage', {}).get('check_demux', False) host_name = os.getenv('HOSTNAME', os.uname()[1]).split('.', 1)[0] for data_dir in CONFIG.get('storage').get('data_dirs'): if not os.path.exists(data_dir) or not os.path.isdir(data_dir): logger.warn('Data directory "{}" does not exist or not a directory'.format(data_dir)) continue logger.info('Moving old runs in {}'.format(data_dir)) with filesystem.chdir(data_dir): for run in [r for r in os.listdir(data_dir) if re.match(filesystem.RUN_RE, r)]: rta_file = os.path.join(run, finished_run_indicator) if os.path.exists(rta_file): if check_demux: if misc.run_is_demuxed(run, couch_info): logger.info('Moving run {} to nosync directory'.format(os.path.basename(run))) shutil.move(run, 'nosync') elif 'miseq' in data_dir: miseq_run = MiSeq_Run(run, CONFIG) if miseq_run.get_run_type() == 'NON-NGI-RUN': logger.info('Run {} is a non-platform run, so moving it to nosync directory'.format(os.path.basename(run))) shutil.move(run, 'nosync') elif os.stat(rta_file).st_mtime < time.time() - seconds: logger.warn('Run {} is older than given time, but it is not demultiplexed yet' .format(run)) sbt = 'Run not demultiplexed - {}'.format(run) msg = ('Run "{}" in "{}" is older then given threshold, but seems like it is not ' 'yet demultiplexed'.format(os.path.join(data_dir, run), host_name)) misc.send_mail(sbt, msg, mail_recipients) else: if os.stat(rta_file).st_mtime < time.time() - seconds: logger.info('Moving run {} to nosync directory'.format(os.path.basename(run))) shutil.move(run, 'nosync') else: logger.info('{} file exists but is not older than given time, skipping run {}' .format(finished_run_indicator, run))
def avail_disk_space(self, path, run): """Check the space on file system based on parent directory of the run.""" # not able to fetch runtype use the max size as precaution, size units in GB illumina_run_sizes = { 'hiseq': 500, 'hiseqx': 900, 'novaseq': 1800, 'miseq': 20, 'nextseq': 250 } required_size = illumina_run_sizes.get(self._get_run_type(run), 900) * 2 # check for any ongoing runs and add up the required size accrdingly for ddir in self.data_dirs.values(): if not os.path.isdir(ddir): continue for item in os.listdir(ddir): if not re.match(filesystem.RUN_RE, item): continue if not os.path.exists( os.path.join(ddir, item, 'RTAComplete.txt')): required_size += illumina_run_sizes.get( self._get_run_type(run), 900) # get available free space from the file system try: df_proc = sp.Popen(['df', path], stdout=sp.PIPE, stderr=sp.PIPE) df_out, df_err = df_proc.communicate() available_size = int(df_out.strip().split('\n') [-1].strip().split()[3]) / 1024 / 1024 except Exception as e: logger.error( 'Evaluation of disk space failed with error {}'.format(e)) raise SystemExit if available_size < required_size: e_msg = 'Required space for encryption is {}GB, but only {}GB available'.format( required_size, available_size) subjt = 'Low space for encryption - {}'.format(self.host_name) logger.error(e_msg) misc.send_mail(subjt, e_msg, self.mail_recipients) raise SystemExit
def post_qc(self, qc_file, status, log_file, rcp): """ Checks wether a run has passed the final qc. :param str run: Run directory :param str qc_file: Path to file with information about transferred runs :param str log_file: Path to the log file :param str rcp: destinatary """ already_seen = False runname = self.id shortrun = runname.split('_')[0] + '_' +runname.split('_')[-1] QC_result = "" with open(qc_file, 'ab+') as f: f.seek(0) for row in f: # Rows have two columns: run and transfer date if row.split('\t')[0] == runname: already_seen=True if status: QC_result = "PASSED" else: QC_result = "FAILED" if not already_seen: f.write("{}\t{}\n".format(runname,QC_result)) sj = "{} Demultiplexed".format(runname) cnt = """The run {run} has been demultiplexed and automatic QC took place. The Run will be transferred to Nestor for further analysis. Autmatic QC defines the runs as: {QC} The run is available at : https://genomics-status.scilifelab.se/flowcells/{shortfc} To read the logs, run the following command on {server} grep -A30 "Checking run {run}" {log} """.format(run=runname, QC=QC_result, shortfc=shortrun, log=log_file, server=os.uname()[1]) misc.send_mail(sj, cnt, rcp)
def _exec_fn(obj, fn): try: if fn(): logger.info("{} processed successfully".format(str(obj))) else: logger.info("{} processed with some errors, check log".format( str(obj))) except Exception as e: logger.exception(e) try: send_mail( subject="[ERROR] processing failed: {}".format(str(obj)), content= "Project: {}\nSample: {}\nCommand: {}\n\nAdditional information:{}\n" .format(obj.projectid, obj.sampleid, str(fn), str(e)), receiver=obj.config.get('operator')) except Exception as me: logger.error( "processing {} failed - reason: {}, but operator {} could not be notified - reason: {}" .format(str(obj), e, obj.config.get('operator'), me)) else: logger.error( "processing {} failed - reason: {}, operator {} has been notified" .format(str(obj), str(e), obj.config.get('operator')))
def process_minion_run(minion_run, sequencing_ongoing=False, nanoseq_ongoing=False): """Process MinION QC runs. Will not start nanoseq if a sequencing run is ongoing, to limit memory usage. Will also maximum start one nanoseq run at once, for the same reason. """ logger.info('Processing QC run: {}'.format(minion_run.run_dir)) email_recipients = CONFIG.get('mail').get('recipients') if len(minion_run.summary_file) and os.path.isfile( minion_run.summary_file[0]) and not os.path.isdir( minion_run.nanoseq_dir): logger.info( 'Sequencing done for run {}. Attempting to start analysis.'.format( minion_run.run_dir)) if not minion_run.nanoseq_sample_sheet: minion_run.parse_lims_sample_sheet() if os.path.isfile(minion_run.nanoseq_sample_sheet): if nanoseq_ongoing: logger.warn( 'Nanoseq already started, will not attempt to start for {}' .format(minion_run.run_dir)) elif sequencing_ongoing: logger.warn( 'Sequencing ongoing, will not attempt to start Nanoseq for {}' .format(minion_run.run_dir)) else: minion_run.start_nanoseq() nanoseq_ongoing = True else: logger.warn( 'Samplesheet not found for run {}. Operator notified. Skipping.' .format(minion_run.run_dir)) email_subject = ('Samplesheet missing for run {}'.format( os.path.basename(minion_run.run_dir))) email_message = 'There was an issue locating the samplesheet for run {}.'.format( minion_run.run_dir) send_mail(email_subject, email_message, email_recipients) elif os.path.isdir(minion_run.nanoseq_dir) and not os.path.isfile( minion_run.nanoseq_exit_status_file): logger.info( 'Nanoseq has started for run {} but is not yet done. Skipping.'. format(minion_run.run_dir)) elif os.path.isdir(minion_run.nanoseq_dir) and os.path.isfile( minion_run.nanoseq_exit_status_file): nanoseq_successful = minion_run.check_exit_status( minion_run.nanoseq_exit_status_file) if nanoseq_successful: if not os.path.isdir(minion_run.anglerfish_dir): logger.info( 'Nanoseq done for run {}. Attempting to start Anglerfish.'. format(minion_run.run_id)) if not minion_run.anglerfish_sample_sheet: minion_run.anglerfish_sample_sheet = os.path.join( minion_run.run_dir, 'anglerfish_sample_sheet.csv' ) # For cronjob, AF sample sheet was generated at previous run if os.path.isfile(minion_run.anglerfish_sample_sheet): minion_run.start_anglerfish() else: logger.warn( 'Anglerfish sample sheet missing for run {}. ' 'Please provide one using --anglerfish_sample_sheet ' 'if running TACA manually.'.format(minion_run.run_id)) elif not os.path.isfile(minion_run.anglerfish_exit_status_file): logger.info( 'Anglerfish has started for run {} but is not yet done. Skipping.' .format(minion_run.run_id)) elif os.path.isfile(minion_run.anglerfish_exit_status_file): anglerfish_successful = minion_run.check_exit_status( minion_run.anglerfish_exit_status_file) if anglerfish_successful: if minion_run.copy_results_for_lims(): logger.info( 'Anglerfish finished OK for run {}. Notifying operator.' .format(minion_run.run_id)) email_subject = ( 'Anglerfish successfully processed run {}'.format( minion_run.run_id)) email_message = ( 'Anglerfish has successfully finished for run {}. Please ' 'finish the QC step in lims.').format( minion_run.run_id) send_mail(email_subject, email_message, email_recipients) else: email_subject = ( 'Run processed with errors: {}'.format( minion_run.run_id)) email_message = ( 'Anglerfish has successfully finished for run {} but an error ' 'occurred while transferring the results to lims.' ).format(minion_run.run_id) send_mail(email_subject, email_message, email_recipients) if minion_run.is_not_transferred(): if minion_run.transfer_run(): if minion_run.update_transfer_log(): logger.info( 'Run {} has been synced to the analysis cluster.' .format(minion_run.run_id)) else: email_subject = ( 'Run processed with errors: {}'.format( minion_run.run_id)) email_message = ( 'Run {} has been transferred, but an error occurred while updating ' 'the transfer log').format( minion_run.run_id) send_mail(email_subject, email_message, email_recipients) if minion_run.archive_run(): logger.info( 'Run {} is finished and has been archived. Notifying operator.' .format(minion_run.run_id)) email_subject = ( 'Run successfully processed: {}'.format( minion_run.run_id)) email_message = ( 'Run {} has been analysed, transferred and archived ' 'successfully.').format(minion_run.run_id) send_mail(email_subject, email_message, email_recipients) else: email_subject = ( 'Run processed with errors: {}'.format( minion_run.run_id)) email_message = ( 'Run {} has been analysed, but an error occurred during ' 'archiving').format(minion_run.run_id) send_mail(email_subject, email_message, email_recipients) else: logger.warn( 'An error occurred during transfer of run {} ' 'to Irma. Notifying operator.'.format( minion_run.run_id)) email_subject = ( 'Run processed with errors: {}'.format( minion_run.run_id)) email_message = ( 'Run {} has been analysed, but an error occurred during ' 'transfer.').format(minion_run.run_id) send_mail(email_subject, email_message, email_recipients) else: logger.warn( 'The following run has already been transferred, ' 'skipping: {}'.format(minion_run.run_id)) else: logger.warn( 'Anglerfish exited with a non-zero exit status for run {}. ' 'Notifying operator.'.format(minion_run.run_id)) email_subject = ('Run processed with errors: {}'.format( minion_run.run_id)) email_message = ( 'Anglerfish exited with errors for run {}. Please ' 'check the log files and restart.').format( minion_run.run_id) send_mail(email_subject, email_message, email_recipients) else: logger.warn( 'Nanoseq exited with a non-zero exit status for run {}. ' 'Notifying operator.'.format(minion_run.run_id)) email_subject = ('Analysis failed for run {}'.format( minion_run.run_id)) email_message = 'The nanoseq analysis failed for run {}.'.format( minion_run.run_id) send_mail(email_subject, email_message, email_recipients) else: logger.info('Run {} not finished sequencing yet. Skipping.'.format( minion_run.run_id)) return nanoseq_ongoing
def transfer_run(self, t_file, analysis, mail_recipients=None): """ Transfer a run to the analysis server. Will add group R/W permissions to the run directory in the destination server so that the run can be processed by any user/account in that group (i.e a functional account...). :param str t_file: File where to put the transfer information :param bool analysis: Trigger analysis on remote server """ # TODO: check the run type and build the correct rsync command # The option -a implies -o and -g which is not the desired behaviour command_line = ['rsync', '-Lav', '--no-o', '--no-g'] # Add R/W permissions to the group command_line.append('--chmod=g+rw') # This horrible thing here avoids data dup when we use multiple indexes in a lane/FC command_line.append("--exclude=Demultiplexing_*/*_*") command_line.append("--include=*/") for to_include in self.CONFIG['analysis_server']['sync']['include']: command_line.append("--include={}".format(to_include)) command_line.extend(["--exclude=*", "--prune-empty-dirs"]) r_user = self.CONFIG['analysis_server']['user'] r_host = self.CONFIG['analysis_server']['host'] r_dir = self.CONFIG['analysis_server']['sync']['data_archive'] remote = "{}@{}:{}".format(r_user, r_host, r_dir) command_line.extend([self.run_dir, remote]) # Create temp file indicating that the run is being transferred try: open(os.path.join(self.run_dir, 'transferring'), 'w').close() except IOError as e: logger.error("Cannot create a file in {}. " "Check the run name, and the permissions.".format( self.id)) raise e started = ("Started transfer of run {} on {}".format( self.id, datetime.now())) logger.info(started) # In this particular case we want to capture the exception because we want # to delete the transfer file try: msge_text = "I am about to transfer with this command \n{}".format( command_line) logger.info(msge_text) misc.call_external_command(command_line, with_log_files=True, prefix="", log_dir=self.run_dir) except subprocess.CalledProcessError as exception: os.remove(os.path.join(self.run_dir, 'transferring')) #Send an email notifying that the transfer failed runname = self.id sbt = ("Rsync of run {} failed".format(runname)) msg = """ Rsync of data for run {run} has failed! Raised the following exception: {e} """.format(run=runname, e=exception) if mail_recipients: send_mail(sbt, msg, mail_recipients) raise exception logger.info('Adding run {} to {}'.format(self.id, t_file)) with open(t_file, 'a') as tranfer_file: tsv_writer = csv.writer(tranfer_file, delimiter='\t') tsv_writer.writerow([self.id, str(datetime.now())]) os.remove(os.path.join(self.run_dir, 'transferring')) #Send an email notifying that the transfer was successful runname = self.id sbt = ("Rsync of data for run {} to Irma has finished".format(runname)) msg = """ Rsync of data for run {run} to Irma has finished! The run is available at : https://genomics-status.scilifelab.se/flowcells/{run} """.format(run=runname) if mail_recipients: send_mail(sbt, msg, mail_recipients) if analysis: # This needs to pass the runtype (i.e., Xten or HiSeq) and start the correct pipeline self.trigger_analysis()
class backup_utils(object): """A class object with main utility methods related to backing up""" def __init__(self, run=None): self.run = run self.fetch_config_info() self.host_name = os.getenv('HOSTNAME', os.uname()[1]).split('.', 1)[0] def fetch_config_info(self): """Try to fecth required info from the config file. Log and exit if any neccesary info is missing""" try: self.data_dirs = CONFIG['backup']['data_dirs'] self.archive_dirs = CONFIG['backup']['archive_dirs'] self.keys_path = CONFIG['backup']['keys_path'] self.gpg_receiver = CONFIG['backup']['gpg_receiver'] self.mail_recipients = CONFIG['mail']['recipients'] self.check_demux = CONFIG.get('backup', {}).get('check_demux', False) self.couch_info = CONFIG.get('statusdb') except KeyError as e: logger.error( "Config file is missing the key {}, make sure it have all required information" .format(str(e))) raise SystemExit def collect_runs(self, ext=None, filter_by_ext=False): """Collect runs from archive directories""" self.runs = [] if self.run: run = run_vars(self.run) if not re.match(filesystem.RUN_RE, run.name): logger.error("Given run {} did not match a FC pattern".format( self.run)) raise SystemExit self.runs.append(run) else: for adir in self.archive_dirs.values(): if not os.path.isdir(adir): logger.warn( "Path {} does not exist or it is not a directory". format(adir)) continue for item in os.listdir(adir): if filter_by_ext and not item.endswith(ext): continue elif item.endswith(ext): item = item.replace(ext, '') elif not os.path.isdir(os.path.join(adir, item)): continue if re.match(filesystem.RUN_RE, item) and item not in self.runs: self.runs.append(run_vars(os.path.join(adir, item))) def avail_disk_space(self, path, run): """Check the space on file system based on parent directory of the run""" # not able to fetch runtype use the max size as precaution, size units in GB illumina_run_sizes = { 'hiseq': 500, 'hiseqx': 900, 'novaseq': 1800, 'miseq': 20 } required_size = illumina_run_sizes.get(self._get_run_type(run), 900) * 2 # check for any ongoing runs and add up the required size accrdingly for ddir in self.data_dirs.values(): if not os.path.isdir(ddir): continue for item in os.listdir(ddir): if not re.match(filesystem.RUN_RE, item): continue if not os.path.exists( os.path.join(ddir, item, "RTAComplete.txt")): required_size += illumina_run_sizes.get( self._get_run_type(run), 900) # get available free space from the file system try: df_proc = sp.Popen(['df', path], stdout=sp.PIPE, stderr=sp.PIPE) df_out, df_err = df_proc.communicate() available_size = int(df_out.strip().split('\n') [-1].strip().split()[2]) / 1024 / 1024 except Exception, e: logger.error( "Evaluation of disk space failed with error {}".format(e)) raise SystemExit if available_size < required_size: e_msg = "Required space for encryption is {}GB, but only {}GB available".format( required_size, available_size) subjt = "Low space for encryption - {}".format(self.host_name) logger.error(e_msg) misc.send_mail(subjt, e_msg, self.mail_recipients) raise SystemExit
def process_run(run_dir, nanoseq_sample_sheet, anglerfish_sample_sheet): """Proceess nanopore runs.""" qc_run = True if nanoseq_sample_sheet and not anglerfish_sample_sheet: qc_run = False logger.info('Processing run: {} as a {}'.format(run_dir, 'QC run' if qc_run else 'non-QC run')) summary_file = glob.glob(run_dir + '/final_summary*.txt')[0] nanoseq_dir = os.path.join(run_dir, 'nanoseq_output') anglerfish_dir = os.path.join(run_dir, 'anglerfish_output') anglerfish_sample_sheet = os.path.join(run_dir, 'anglerfish_sample_sheet.csv') nanoseq_exit_status_file = os.path.join(run_dir, '.exitcode_for_nanoseq') anglerfish_exit_status_file = os.path.join(run_dir, '.exitcode_for_anglerfish') email_recipients = CONFIG.get('mail').get('recipients') if os.path.isfile(summary_file) and not os.path.isdir(nanoseq_dir): logger.info('Sequencing done for run {}. Attempting to start analysis.'.format(run_dir)) if not nanoseq_sample_sheet: nanoseq_sample_sheet = parse_lims_sample_sheet(run_dir) if os.path.isfile(nanoseq_sample_sheet): start_nanoseq(run_dir, nanoseq_sample_sheet) else: logger.warn('Samplesheet not found for run {}. Operator notified. Skipping.'.format(run_dir)) email_subject = ('Samplesheet missing for run {}'.format(os.path.basename(run_dir))) email_message = 'There was an issue locating the samplesheet for run {}.'.format(run_dir) send_mail(email_subject, email_message, email_recipients) elif os.path.isdir(nanoseq_dir) and not os.path.isfile(nanoseq_exit_status_file): logger.info('Nanoseq has started for run {} but is not yet done. Skipping.'.format(run_dir)) elif os.path.isdir(nanoseq_dir) and os.path.isfile(nanoseq_exit_status_file): nanoseq_successful = check_exit_status(nanoseq_exit_status_file) if nanoseq_successful: run_id = os.path.basename(run_dir) transfer_log = CONFIG.get('nanopore_analysis').get('transfer').get('transfer_file') if qc_run and not os.path.isdir(anglerfish_dir): logger.info('Nanoseq done for run {}. Attempting to start Anglerfish.'.format(run_id)) start_anglerfish(run_dir, anglerfish_sample_sheet, anglerfish_dir) elif qc_run and not os.path.isfile(anglerfish_exit_status_file): logger.info('Anglerfish has started for run {} but is not yet done. Skipping.'.format(run_id)) elif qc_run and os.path.isfile(anglerfish_exit_status_file): anglerfish_successful = check_exit_status(anglerfish_exit_status_file) if anglerfish_successful: copy_results_for_lims(run_dir, anglerfish_dir) logger.info('Anglerfish finished OK for run {}. Notifying operator.'.format(run_id)) email_subject = ('Anglerfish successfully processed run {}'.format(os.path.basename(run_id))) email_message = ('Anglerfish has successfully finished for run {}. Please ' 'finish the QC step in lims.').format(run_id) send_mail(email_subject, email_message, email_recipients) if is_not_transferred(run_id, transfer_log): if transfer_run(run_dir): update_transfer_log(run_id, transfer_log) logger.info('Run {} has been synced to the analysis cluster.'.format(run_id)) archive_run(run_dir) logger.info('Run {} is finished and has been archived. Notifying operator.'.format(run_id)) email_subject = ('Run successfully processed: {}'.format(os.path.basename(run_id))) email_message = ('Run {} has been analysed, transferred and archived ' 'successfully.').format(run_id) send_mail(email_subject, email_message, email_recipients) else: logger.warn('An error occurred during transfer of run {} ' 'to Irma. Notifying operator.'.format(run_dir)) email_subject = ('Run processed with errors: {}'.format(os.path.basename(run_id))) email_message = ('Run {} has been analysed, but an error occurred during ' 'transfer.').format(run_id) send_mail(email_subject, email_message, email_recipients) else: logger.warn('The following run has already been transferred, ' 'skipping: {}'.format(run_dir)) else: logger.warn('Anglerfish exited with a non-zero exit status for run {}. ' 'Notifying operator.'.format(run_dir)) email_subject = ('Run processed with errors: {}'.format(os.path.basename(run_id))) email_message = ('Anglerfish exited with errors for run {}. Please ' 'check the log files and restart.').format(run_id) send_mail(email_subject, email_message, email_recipients) elif not qc_run: if is_not_transferred(run_id, transfer_log): if transfer_run(run_dir): update_transfer_log(run_id, transfer_log) logger.info('Run {} has been synced to the analysis cluster.'.format(run_id)) archive_run(run_dir) logger.info('Run {} is finished and has been archived. Notifying operator.'.format(run_id)) email_subject = ('Run successfully processed: {}'.format(run_id)) email_message = ('Run {} has been analysed, transferred and archived ' 'successfully.').format(run_id) send_mail(email_subject, email_message, email_recipients) else: logger.warn('An error occurred during transfer of run {} ' 'to Irma. Notifying operator.'.format(run_dir)) email_subject = ('Run processed with errors: {}'.format(run_id)) email_message = ('Run {} has been analysed, but an error occurred during ' 'transfer.').format(run_id) send_mail(email_subject, email_message, email_recipients) else: logger.warn('The following run has already been transferred, ' 'skipping: {}'.format(run_id)) else: logger.warn('Nanoseq exited with a non-zero exit status for run {}. ' 'Notifying operator.'.format(run_dir)) email_subject = ('Analysis failed for run {}'.format(os.path.basename(run_dir))) email_message = 'The nanoseq analysis failed for run {}.'.format(run_dir) send_mail(email_subject, email_message, email_recipients) else: logger.info('Run {} not finished sequencing yet. Skipping.'.format(run_dir)) return