def _refresh_access_token(self): timeleft_before = str_time_long(self._get_timeleft(cached=False)) LocalProcess(self._kinit_exec, '-R').finish(timeout=10) LocalProcess(self._aklog_exec).finish(timeout=10) timeleft_after = str_time_long(self._get_timeleft(cached=False)) self._log.log(logging.INFO2, 'Time left for access token "%s" changed from %s to %s', self.get_object_name(), timeleft_before, timeleft_after)
def can_submit(self, needed_time, can_currently_submit): if self._get_timeleft(cached=True) < 0: raise UserError( 'Your access token (%s) expired %s ago! (Required lifetime: %s)' % (self.get_object_name(), str_time_long(-self._get_timeleft(cached=True)), str_time_long(self._min_life_time))) if not self._check_time_left(self._min_life_time): raise UserError( 'Your access token (%s) only has %d seconds left! (Required are %s)' % (self.get_object_name(), self._get_timeleft(cached=True), str_time_long(self._min_life_time))) if self._ignore_time or (needed_time < 0): return True if not self._check_time_left(self._min_life_time + needed_time) and can_currently_submit: self._log.log_time( logging.WARNING, 'Access token (%s) lifetime (%s) does not meet the access and walltime (%s) requirements!', self.get_object_name(), str_time_long(self._get_timeleft(cached=False)), str_time_long(self._min_life_time + needed_time)) self._log.log_time(logging.WARNING, 'Disabling job submission') return False return True
def can_submit(self, needed_time, can_currently_submit): if not self._check_time_left(self._min_life_time): raise UserError('Your access token (%s) only has %d seconds left! (Required are %s)' % (self.get_object_name(), self._get_timeleft(cached=True), str_time_long(self._min_life_time))) if self._ignore_time or (needed_time < 0): return True if not self._check_time_left(self._min_life_time + needed_time) and can_currently_submit: self._log.log_time(logging.WARNING, 'Access token (%s) lifetime (%s) does not meet the access and walltime (%s) requirements!', self.get_object_name(), str_time_long(self._get_timeleft(cached=False)), str_time_long(self._min_life_time + needed_time)) self._log.log_time(logging.WARNING, 'Disabling job submission') return False return True
def __init__(self, config, datasource_name, repository, keep_old=True): BaseDataParameterSource.__init__(self, config, datasource_name, repository) # hide provider property set by __new__ self._provider = self.provider del self.provider if self._provider.need_init_query(): self._provider.get_block_list_cached(show_stats=False) data_src_text = 'Dataset source %r' % datasource_name # Select dataset refresh rate data_refresh = config.get_time('%s refresh' % datasource_name, -1, on_change=None) if data_refresh >= 0: data_refresh = max(data_refresh, self._provider.get_query_interval()) self._log.info('%s will be queried every %s', data_src_text, str_time_long(data_refresh)) self.setup_resync(interval=data_refresh, force=config.get_state('resync', detail='datasets')) splitter_name = config.get('%s splitter' % datasource_name, 'FileBoundarySplitter') splitter_cls = self._provider.check_splitter(DataSplitter.get_class(splitter_name)) self._splitter = splitter_cls(config, datasource_name) # Settings: (self._dn, self._keep_old) = (config.get_work_path(), keep_old) ensure_dir_exists(self._dn, 'partition map directory', DatasetError) self._set_reader(self._init_reader()) if not self.get_parameter_len(): if data_refresh < 0: raise UserError('%s does not provide jobs to process' % data_src_text) self._log.warning('%s does not provide jobs to process', data_src_text)
def _check_time_left(self, needed_time): # check for time left delta = time.time() - self._last_update timeleft = max(0, self._get_timeleft(cached=True) - delta) # recheck token => after > 30min have passed or when time is running out (max every 5 minutes) if (delta > self._min_query_time) or (timeleft < needed_time and delta > self._max_query_time): self._last_update = time.time() timeleft = self._get_timeleft(cached=False) self._log.log_time(logging.INFO, 'Time left for access token "%s": %s', self.get_object_name(), str_time_long(timeleft)) return timeleft >= needed_time
def _explain_failure(self, task, job_obj): map_error_code2msg = dict(task.map_error_code2msg) msg_list = [] exit_code = job_obj.get('retcode') if exit_code: msg_list.append('error code: %d' % exit_code) if self._log_status.isEnabledFor(logging.DEBUG) and (exit_code in map_error_code2msg): msg_list.append(map_error_code2msg[exit_code]) job_location = job_obj.get_job_location() if job_location: msg_list.append(job_location) if (job_obj.get('runtime') is not None) and ((job_obj.get('runtime') or 0) >= 0): msg_list.append('runtime %s' % str_time_long(job_obj.get('runtime') or 0)) return str.join(' - ', msg_list)
def __init__(self, config, datasource_name, repository, keep_old=True): BaseDataParameterSource.__init__(self, config, datasource_name, repository) # hide provider property set by __new__ self._provider = self.provider del self.provider if self._provider.need_init_query(): self._provider.get_block_list_cached(show_stats=False) data_src_text = 'Dataset source %r' % datasource_name # Select dataset refresh rate data_refresh = config.get_time('%s refresh' % datasource_name, -1, on_change=None) if data_refresh >= 0: data_refresh = max(data_refresh, self._provider.get_query_interval()) self._log.info('%s will be queried every %s', data_src_text, str_time_long(data_refresh)) self.setup_resync(interval=data_refresh, force=config.get_state('resync', detail='datasets')) splitter_name = config.get('%s splitter' % datasource_name, 'FileBoundarySplitter') splitter_cls = self._provider.check_splitter( DataSplitter.get_class(splitter_name)) self._splitter = splitter_cls(config, datasource_name) # Settings: (self._dn, self._keep_old) = (config.get_work_path(), keep_old) ensure_dir_exists(self._dn, 'partition map directory', DatasetError) self._set_reader(self._init_reader()) if not self.get_parameter_len(): if data_refresh < 0: raise UserError('%s does not provide jobs to process' % data_src_text) self._log.warning('%s does not provide jobs to process', data_src_text)
def on_job_state_change(self, job_db_len, jobnum, job_obj, old_state, new_state, reason=None): jobnum_len = int(math.log10(max(1, job_db_len)) + 1) job_status_str_list = [ 'Job %s state changed from %s to %s' % (str(jobnum).ljust(jobnum_len), Job.enum2str(old_state), Job.enum2str(new_state)) ] if reason: job_status_str_list.append('(%s)' % reason) if self._show_wms and job_obj.gc_id: job_status_str_list.append('(WMS:%s)' % job_obj.gc_id.split('.')[1]) if (new_state == Job.SUBMITTED) and (job_obj.attempt > 1): job_status_str_list.append('(retry #%s)' % (job_obj.attempt - 1)) elif (new_state == Job.QUEUED) and (job_obj.get_job_location() != 'N/A'): job_status_str_list.append('(%s)' % job_obj.get_job_location()) elif (new_state in [Job.WAITING, Job.ABORTED, Job.DISABLED ]) and job_obj.get('reason'): job_status_str_list.append('(%s)' % job_obj.get('reason')) elif (new_state == Job.SUCCESS) and (job_obj.get('runtime') is not None): if (job_obj.get('runtime') or 0) >= 0: job_status_str_list.append( '(runtime %s)' % str_time_long(job_obj.get('runtime') or 0)) elif new_state == Job.FAILED: fail_msg = self._explain_failure(job_obj) if fail_msg: job_status_str_list.append('(%s)' % fail_msg) self._log_status.log_time(logging.INFO, str.join(' ', job_status_str_list))
def on_job_state_change(self, task, job_db_len, jobnum, job_obj, old_state, new_state, reason=None): jobnum_len = int(math.log10(max(1, job_db_len)) + 1) job_status_str_list = ['Job %s state changed from %s to %s' % ( str(jobnum).ljust(jobnum_len), Job.enum2str(old_state), Job.enum2str(new_state))] if reason: job_status_str_list.append('(%s)' % reason) if self._show_wms and job_obj.gc_id: job_status_str_list.append('(WMS:%s)' % job_obj.gc_id.split('.')[1]) if (new_state == Job.SUBMITTED) and (job_obj.attempt > 1): job_status_str_list.append('(retry #%s)' % (job_obj.attempt - 1)) elif (new_state == Job.QUEUED) and (job_obj.get_job_location() != 'N/A'): job_status_str_list.append('(%s)' % job_obj.get_job_location()) elif (new_state in [Job.WAITING, Job.ABORTED, Job.DISABLED]) and job_obj.get('reason'): job_status_str_list.append('(%s)' % job_obj.get('reason')) elif (new_state == Job.SUCCESS) and (job_obj.get('runtime') is not None): if (job_obj.get('runtime') or 0) >= 0: job_status_str_list.append('(runtime %s)' % str_time_long(job_obj.get('runtime') or 0)) elif new_state == Job.FAILED: fail_msg = self._explain_failure(task, job_obj) if fail_msg: job_status_str_list.append('(%s)' % fail_msg) self._log_status.log_time(logging.INFO, str.join(' ', job_status_str_list))
def show_report(self, job_db, jobnum_list): jr_iter = imap(lambda jobnum: job_db.get_job_transient(jobnum).get('runtime', 0), jobnum_list) cpu_time = sum(ifilter(lambda rt: rt > 0, jr_iter)) msg1 = 'Consumed wall time: %-20s' % str_time_long(cpu_time) msg2 = 'Estimated cost: $%.2f' % ((cpu_time / 60. / 60.) * self._dollar_per_hour) self._show_line(msg1 + msg2.rjust(65 - len(msg1)))