def items(section, issuer=None): """ Return a list of (option, value) pairs for each option in the given section. Values are auto-coerced as in get(). :param section: The name of the section. :param value: The content of the value. :param issuer: The issuer account. :returns: [('option', auto-coerced value), ...] """ kwargs = {'issuer': issuer, 'section': section} if not permission.has_permission(issuer=issuer, action='config_items', kwargs=kwargs): raise exception.AccessDenied('%s cannot retrieve options and values from section %s' % (issuer, section)) return config.items(section)
def finisher(once=False, sleep_time=60, activities=None, bulk=100, db_bulk=1000, partition_wait_time=10): """ Main loop to update the replicas and rules based on finished requests. """ try: conveyor_config = {item[0]: item[1] for item in items('conveyor')} except ConfigNotFound: logging.log(logging.INFO, 'No configuration found for conveyor') conveyor_config = {} # Get suspicious patterns suspicious_patterns = conveyor_config.get('suspicious_pattern', []) if suspicious_patterns: pattern = str(suspicious_patterns) patterns = pattern.split(",") suspicious_patterns = [re.compile(pat.strip()) for pat in patterns] logging.log( logging.DEBUG, "Suspicious patterns: %s" % [pat.pattern for pat in suspicious_patterns]) retry_protocol_mismatches = conveyor_config.get( 'retry_protocol_mismatches', False) logger_prefix = executable = 'conveyor-finisher' if activities: activities.sort() executable += '--activities ' + str(activities) run_conveyor_daemon( once=once, graceful_stop=graceful_stop, executable=executable, logger_prefix=logger_prefix, partition_wait_time=partition_wait_time, sleep_time=sleep_time, run_once_fnc=functools.partial( run_once, bulk=bulk, db_bulk=db_bulk, suspicious_patterns=suspicious_patterns, retry_protocol_mismatches=retry_protocol_mismatches, ), activities=activities, )
def get_config_limits(): """ Get config limits. :returns: Dictionary of limits. """ config_limits = {} items = config_core.items('throttler') for opt, value in items: try: activity, rsename = opt.split(',') if rsename == 'all_rses': rse_id = 'all_rses' else: rse_id = get_rse_id(rsename) if activity not in config_limits: config_limits[activity] = {} config_limits[activity][rse_id] = int(value) except: logging.warning("Failed to parse throttler config %s:%s, error: %s" % (opt, value, traceback.format_exc())) return config_limits
def get_config_limits(logger=logging.log): """ Get config limits. :param logger: Optional decorated logger that can be passed from the calling daemons or servers. :returns: Dictionary of limits. """ config_limits = {} items = config_core.items('throttler', use_cache=using_memcache) for opt, value in items: try: if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): activity, rse_id = opt.split( ',' ) # In multi VO mode, require config to be set using RSE IDs else: activity, rse_name = opt.split( ',' ) # In single VO mode, expect config to be set using RSE names if rse_name == 'all_rses': rse_id = 'all_rses' else: rse_id = get_rse_id( rse_name, vo='def') # In single VO mode, VO should always be def if activity not in config_limits: config_limits[activity] = {} config_limits[activity][rse_id] = int(value) except: logger( logging.WARNING, "Failed to parse throttler config %s:%s, error: %s" % (opt, value, traceback.format_exc())) return config_limits
def finisher(once=False, sleep_time=60, activities=None, bulk=100, db_bulk=1000): """ Main loop to update the replicas and rules based on finished requests. """ try: conveyor_config = {item[0]: item[1] for item in items('conveyor')} except ConfigNotFound: logging.info('No configuration found for conveyor') conveyor_config = {} # Get suspicious patterns suspicious_patterns = conveyor_config.get('suspicious_pattern', []) if suspicious_patterns: pattern = str(suspicious_patterns) patterns = pattern.split(",") suspicious_patterns = [re.compile(pat.strip()) for pat in patterns] logging.debug("Suspicious patterns: %s" % [pat.pattern for pat in suspicious_patterns]) retry_protocol_mismatches = conveyor_config.get('retry_protocol_mismatches', False) executable = ' '.join(sys.argv) hostname = socket.getfqdn() pid = os.getpid() hb_thread = threading.current_thread() heartbeat.sanity_check(executable=executable, hostname=hostname) # Make an initial heartbeat so that all finishers have the correct worker number on the next try heart_beat = heartbeat.live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1, heart_beat['nr_threads']) logging.info('%s Finisher starting - db_bulk(%i) bulk (%i)', prepend_str, db_bulk, bulk) graceful_stop.wait(10) while not graceful_stop.is_set(): start_time = time.time() try: heart_beat = heartbeat.live(executable, hostname, pid, hb_thread, older_than=3600) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1, heart_beat['nr_threads']) logging.debug('%s Starting new cycle', prepend_str) if activities is None: activities = [None] for activity in activities: logging.debug('%s Working on activity %s', prepend_str, activity) time1 = time.time() reqs = request_core.get_next(request_type=[RequestType.TRANSFER, RequestType.STAGEIN, RequestType.STAGEOUT], state=[RequestState.DONE, RequestState.FAILED, RequestState.LOST, RequestState.SUBMITTING, RequestState.SUBMISSION_FAILED, RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES], limit=db_bulk, older_than=datetime.datetime.utcnow(), total_workers=heart_beat['nr_threads'] - 1, worker_number=heart_beat['assign_thread'], mode_all=True, hash_variable='rule_id') record_timer('daemons.conveyor.finisher.000-get_next', (time.time() - time1) * 1000) time2 = time.time() if reqs: logging.debug('%s Updating %i requests for activity %s', prepend_str, len(reqs), activity) for chunk in chunks(reqs, bulk): try: time3 = time.time() __handle_requests(chunk, suspicious_patterns, retry_protocol_mismatches, prepend_str) record_timer('daemons.conveyor.finisher.handle_requests', (time.time() - time3) * 1000 / (len(chunk) if chunk else 1)) record_counter('daemons.conveyor.finisher.handle_requests', len(chunk)) except Exception as error: logging.warn('%s %s', prepend_str, str(error)) if reqs: logging.debug('%s Finish to update %s finished requests for activity %s in %s seconds', prepend_str, len(reqs), activity, time.time() - time2) except (DatabaseException, DatabaseError) as error: if re.match('.*ORA-00054.*', error.args[0]) or re.match('.*ORA-00060.*', error.args[0]) or 'ERROR 1205 (HY000)' in error.args[0]: logging.warn('%s Lock detected when handling request - skipping: %s', prepend_str, str(error)) else: logging.error('%s %s', prepend_str, traceback.format_exc()) except Exception as error: logging.critical('%s %s', prepend_str, str(error)) if once: return end_time = time.time() time_diff = end_time - start_time if time_diff < sleep_time: logging.info('%s Sleeping for a while : %s seconds', prepend_str, (sleep_time - time_diff)) graceful_stop.wait(sleep_time - time_diff) logging.info('%s Graceful stop requests', prepend_str) heartbeat.die(executable, hostname, pid, hb_thread) logging.info('%s Graceful stop done', prepend_str)
def finisher(once=False, sleep_time=60, activities=None, bulk=100, db_bulk=1000, partition_wait_time=10): """ Main loop to update the replicas and rules based on finished requests. """ try: conveyor_config = {item[0]: item[1] for item in items('conveyor')} except ConfigNotFound: logging.log(logging.INFO, 'No configuration found for conveyor') conveyor_config = {} # Get suspicious patterns suspicious_patterns = conveyor_config.get('suspicious_pattern', []) if suspicious_patterns: pattern = str(suspicious_patterns) patterns = pattern.split(",") suspicious_patterns = [re.compile(pat.strip()) for pat in patterns] logging.log(logging.DEBUG, "Suspicious patterns: %s" % [pat.pattern for pat in suspicious_patterns]) retry_protocol_mismatches = conveyor_config.get('retry_protocol_mismatches', False) logger_prefix = executable = 'conveyor-finisher' if activities: activities.sort() executable += '--activities ' + str(activities) with HeartbeatHandler(executable=executable, logger_prefix=logger_prefix) as heartbeat_handler: logger = heartbeat_handler.logger logger(logging.INFO, 'Finisher starting - db_bulk(%i) bulk (%i)', db_bulk, bulk) if partition_wait_time: graceful_stop.wait(partition_wait_time) while not graceful_stop.is_set(): start_time = time.time() try: heart_beat, logger = heartbeat_handler.live(older_than=3600) if activities is None: activities = [None] for activity in activities: logger(logging.DEBUG, 'Working on activity %s', activity) time1 = time.time() reqs = request_core.get_next(request_type=[RequestType.TRANSFER, RequestType.STAGEIN, RequestType.STAGEOUT], state=[RequestState.DONE, RequestState.FAILED, RequestState.LOST, RequestState.SUBMITTING, RequestState.SUBMISSION_FAILED, RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES, RequestState.MISMATCH_SCHEME], limit=db_bulk, older_than=datetime.datetime.utcnow(), total_workers=heart_beat['nr_threads'], worker_number=heart_beat['assign_thread'], mode_all=True, hash_variable='rule_id') record_timer('daemons.conveyor.finisher.get_next', (time.time() - time1) * 1000) time2 = time.time() if reqs: logger(logging.DEBUG, 'Updating %i requests for activity %s', len(reqs), activity) for chunk in chunks(reqs, bulk): try: time3 = time.time() __handle_requests(chunk, suspicious_patterns, retry_protocol_mismatches, logger=logger) record_timer('daemons.conveyor.finisher.handle_requests_time', (time.time() - time3) * 1000 / (len(chunk) if chunk else 1)) record_counter('daemons.conveyor.finisher.handle_requests', delta=len(chunk)) except Exception as error: logger(logging.WARNING, '%s', str(error)) if reqs: logger(logging.DEBUG, 'Finish to update %s finished requests for activity %s in %s seconds', len(reqs), activity, time.time() - time2) except (DatabaseException, DatabaseError) as error: if re.match('.*ORA-00054.*', error.args[0]) or re.match('.*ORA-00060.*', error.args[0]) or 'ERROR 1205 (HY000)' in error.args[0]: logger(logging.WARNING, 'Lock detected when handling request - skipping: %s', str(error)) else: logger(logging.ERROR, 'Exception', exc_info=True) except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) if once: raise if once: break daemon_sleep(start_time=start_time, sleep_time=sleep_time, graceful_stop=graceful_stop, logger=logger)