def init_runners(): """Get initial list of AnarchyRunners""" for resource in runtime.custom_objects_api.list_namespaced_custom_object( runtime.operator_domain, 'v1', runtime.operator_namespace, 'anarchyrunners' ).get('items', []): runner = AnarchyRunner.register(resource) AnarchyRunner.refresh_all_runner_pods(runtime)
def main_loop(): last_runner_check = 0 while True: try: action_cache_lock.acquire() AnarchyAction.start_actions(runtime) except Exception as e: operator_logger.exception("Error in start_actions!") finally: action_cache_lock.release() try: AnarchySubject.retry_failures(runtime) except Exception as e: operator_logger.exception("Error in retry_failures!") if runner_check_interval < time.time() - last_runner_check: try: AnarchyRunner.refresh_all_runner_pods(runtime) last_runner_check = time.time() except: operator_logger.exception( 'Error checking runner pods in main loop') time.sleep(1)
def handle_runner_event(event, **_): if event['type'] == 'DELETED': AnarchyRunner.unregister(event['object']['metadata']['name']) elif event['type'] in ['ADDED', 'MODIFIED', None]: runner = AnarchyRunner.register(event['object']) runner.manage_runner_deployment(runtime) runner.refresh_runner_pods(runtime)
def cleanup_loop(): last_cleanup = 0 last_run_check = 0 last_runner_check = 0 while True: with runtime.is_active_condition: while not runtime.is_active: runtime.is_active_condition.wait() while runtime.is_active: if runner_check_interval < time.time() - last_runner_check: try: AnarchyRunner.manage_runners(runtime) last_runner_check = time.time() except: operator_logger.exception( 'Error in AnarchyRunner.manage_runners!') if cleanup_interval < time.time() - last_cleanup: try: AnarchyGovernor.cleanup(runtime) last_cleanup = time.time() except: operator_logger.exception( 'Error in AnarchyGovernor.cleanup!') if run_check_interval < time.time() - last_run_check: try: AnarchyRun.manage_active_runs(runtime) last_run_check = time.time() except: operator_logger.exception( 'Error in AnarchyRun.manage_active_runs!') time.sleep(5)
def init(): """Initialization function before management loops.""" global init_complete AnarchyGovernor.init(runtime) AnarchyRunner.init(runtime) init_complete = True operator_logger.debug("Completed init")
def watch_runner_pods(): ''' Watch AnarchyRunners to keep definition in sync. ''' while True: try: AnarchyRunner.watch_pods(runtime) except Exception as e: operator_logger.exception("Error in AnarchyRunner watch_pods") time.sleep(5)
def start_runner_process(): ''' Start anarchy-runner process for running in all-in-one pod. ''' operator_logger.info('Starting all-in-one runner') default_runner = AnarchyRunner.get('default') if not default_runner: default_runner = AnarchyRunner.register(AnarchyRunner.default_runner_definition(runtime)) env = os.environ.copy() env['ANARCHY_COMPONENT'] = 'runner' env['ANARCHY_URL'] = 'http://{}:5000'.format(runtime.anarchy_service_name) env['RUNNER_NAME'] = 'default' env['RUNNER_TOKEN'] = default_runner.runner_token subprocess.Popen(['/opt/app-root/src/.s2i/bin/run'], env=env)
def main_loop(): last_cleanup = 0 last_run_check = 0 last_runner_check = 0 while True: with runtime.is_active_condition: while not runtime.is_active: runtime.is_active_condition.wait() if runtime.running_all_in_one: start_runner_process() elif not AnarchyRunner.get('default'): init_default_runner() while runtime.is_active: AnarchyAction.start_actions(runtime) if cleanup_interval < time.time() - last_cleanup: try: AnarchyGovernor.cleanup(runtime) last_cleanup = time.time() except: operator_logger.exception( 'Error in AnarchyGovernor.cleanup!') if run_check_interval < time.time() - last_run_check: try: AnarchyRun.manage_active_runs(runtime) last_run_check = time.time() except: operator_logger.exception( 'Error in AnarchyRun.manage_active_runs!') if runner_check_interval < time.time() - last_runner_check: try: AnarchyRunner.manage_runners(runtime) last_runner_check = time.time() except: operator_logger.exception( 'Error in AnarchyRunner.manage_runners!') time.sleep(1)
def init(): """Initialization function before management loops.""" global init_complete init_runners() init_governors() init_runs() if runtime.running_all_in_one: start_runner_process() elif not AnarchyRunner.get('default'): init_default_runner() init_complete = True operator_logger.debug("Completed init")
def main_loop(): while True: with runtime.is_active_condition: while not runtime.is_active: runtime.is_active_condition.wait() if runtime.running_all_in_one: start_runner_process() elif not AnarchyRunner.get('default'): init_default_runner() while runtime.is_active: AnarchyAction.start_actions(runtime) time.sleep(1)
def init_default_runner(): """ Create default AnarchyRunner if it does not exist. """ try: runner = runtime.custom_objects_api.get_namespaced_custom_object( runtime.operator_domain, 'v1', runtime.operator_namespace, 'anarchyrunners', 'default' ) except kubernetes.client.rest.ApiException as e: if e.status == 404: runner = runtime.custom_objects_api.create_namespaced_custom_object( runtime.operator_domain, 'v1', runtime.operator_namespace, 'anarchyrunners', AnarchyRunner.default_runner_definition(runtime) ) else: raise
def check_runner_auth(auth_header): """ Verify bearer token sent by anarchy runner in API call. """ match = re.match(r'Bearer ([^:]+):([^:]+):(.*)', auth_header) if not match: return None, None runner_name = match.group(1) pod_name = match.group(2) runner_token = match.group(3) anarchy_runner = AnarchyRunner.get(runner_name) if not anarchy_runner: operator_logger.warning('Failed auth for unknown AnarchyRunner %s %s', runner_name, pod_name) return None, None runner_pod = anarchy_runner.pods.get(pod_name) if not runner_pod: operator_logger.warning( 'Failed auth for AnarchyRunner %s %s, unknown pod', runner_name, pod_name) return None, None pod_runner_token = None if runtime.running_all_in_one: pod_runner_token = anarchy_runner.runner_token else: for env_var in runner_pod.spec.containers[0].env: if env_var.name == 'RUNNER_TOKEN': pod_runner_token = env_var.value break if not pod_runner_token: operator_logger.warning( 'Failed auth for AnarchyRunner %s %s, cannot find RUNNER_TOKEN', runner_name, pod_name) return None, None if pod_runner_token == runner_token: return anarchy_runner, runner_pod operator_logger.warning('Invalid auth token for AnarchyRunner %s %s', runner_name, runner_pod) return None, None
def check_runner_auth(auth_header): match = re.match(r'Bearer ([^:]+):([^:]+):(.*)', auth_header) if not match: return None, None runner_name = match.group(1) runner_pod = match.group(2) runner_token = match.group(3) anarchy_runner = AnarchyRunner.get(runner_name) if not anarchy_runner: operator_logger.warning('Failed auth for unknown AnarchyRunner %s %s', runner_name, runner_pod) return None, None elif anarchy_runner.runner_token != runner_token: operator_logger.warning('Invalid auth token for AnarchyRunner %s %s', runner_name, runner_pod) operator_logger.warning('%s %s', anarchy_runner.runner_token, runner_token) return None, None return anarchy_runner, runner_pod
def manage(self, runtime): runner_label_value = self.get_runner_label_value(runtime) if runner_label_value == 'pending': pass elif runner_label_value == 'queued': pass elif runner_label_value == 'failed': if self.retry_after_datetime < datetime.utcnow(): self.set_to_pending(runtime) elif '.' in runner_label_value: # Running, assigned to a runner pod runner_name, runner_pod_name = runner_label_value.split('.') runner = AnarchyRunner.get(runner_name) if runner: if runner.pods.get(runner_pod_name): pass # FIXME - Timeout? else: self.handle_lost_runner(runtime.runner_label, runtime) else: operator_logger.warning( 'Unable to find AnarchyRunner %s for AnarchyRun %s', runner_name, self.name)