def exec_job(**definition) -> Tuple[int, bytes]: job_id = definition['job_id'] container_id = definition['container_id'] log.info( f"{container_name(container_id)}: Executing '{definition['job_name']}'." ) # some sanity checks, to be removed eventually assert scheduler.get_job(job_id) is not None if cfg.client.containers.list(filters={ 'id': container_id, 'status': 'paused' }): raise AssertionError('Container is paused.') if not cfg.client.containers.list(filters={ 'id': container_id, 'status': 'running' }): assert scheduler.get_job(job_id) is None raise AssertionError('Container is not running.') # end of sanity checks return cfg.client.containers.get(container_id).exec_run( cmd=definition['command'], user=definition['user'], environment=definition['environment'], workdir=definition.get('workdir'), )
def add(container_id: str, definitions: Mapping[str, Dict], paused: bool = False) -> None: log.debug(f'Adding jobs to container {container_id}.') for job_name, definition in definitions.items(): job_id = generate_id(*definition.get("service_id") or (container_id, ), job_name) definition.update({ 'job_name': job_name, 'job_id': job_id, 'container_id': container_id }) trigger_class, trigger_config = definition['trigger'] scheduler.add_job( func=exec_job, trigger=trigger_class( *trigger_config, timezone=definition['timezone'], jitter=definition['jitter'], ), kwargs=definition, id=job_id, max_instances=definition['max'], next_run_time=None if paused else undefined_runtime, replace_existing=True, ) log.info(f"{container_name(container_id)}: Added " + ("paused " if paused else "") + f"'{job_name}' ({job_id}).")
def main() -> None: if not lock.acquire(blocking=False): print("Couldn't acquire lock file at %s, exiting." % lock.path) sys.exit(1) log.info('Deck Chores %s started.' % __version__) try: generate_config() log_handler.setFormatter(logging.Formatter(cfg.logformat, style='{')) log.debug('Config: %s' % cfg.__dict__) if there_is_another_deck_chores_container(): log.error( "There's another container running deck-chores, maybe paused or restarting." ) raise SystemExit(1) jobs.start_scheduler() inspection_time = inspect_running_containers() listen(since=inspection_time) except SystemExit as e: exit_code = e.code except ConfigurationError as e: log.error(str(e)) exit_code = 1 except Exception as e: log.error('Caught unhandled exception:') log.exception(e) # type: ignore exit_code = 3 else: exit_code = 0 finally: shutdown() lock.release() sys.exit(exit_code)
def handle_unpause(event: dict) -> None: log.debug('Handling unpause.') container_id = event['Actor']['ID'] for job in jobs.get_jobs_for_container(container_id): log.info('Resuming job %s for %s' % (job.kwargs['job_name'], job.kwargs['container_name'])) job.resume()
def on_max_instances(event: events.JobSubmissionEvent) -> None: job = scheduler.get_job(event.job_id) definition = job.kwargs log.info( f"{container_name(definition['container_id'])}: " f"Not running {definition['job_name']}, " f"maximum instances of {job.max_instances} are still running." )
def process_running_container_labels(container_id: str) -> None: service_id, options, definitions = parse.labels(container_id) if not definitions: return if service_id and 'service' in options: if service_id in locking_container_to_services_map.values(): log.debug('Service id has a registered job: %s' % service_id) return log.info('Locking service id: %s' % service_id) locking_container_to_services_map[container_id] = service_id jobs.add(container_id, definitions)
def handle_die(event: dict) -> None: log.debug('Handling die.') container_id = event['Actor']['ID'] service_id, options, definitions = parse.labels(container_id) if not definitions: return if service_id and 'service' in options: if container_id in locking_container_to_services_map: log.info('Unlocking service id: %s' % service_id) del locking_container_to_services_map[container_id] else: return container_name = cfg.client.containers.get(container_id).name for job_name in definitions: log.info("Removing job '%s' for %s" % (job_name, container_name)) jobs.remove(generate_id(container_id, job_name))
def listen(since: datetime = None) -> None: if since is None: since = datetime.utcnow() log.info('Listening to events.') for event_json in cfg.client.events(since=since): if b'container' not in event_json: continue if not any((x in event_json) for x in (b'start', b'die', b'pause', b'unpause')): continue event = from_json(event_json) log.debug('Daemon event: %s' % event) if event['Type'] != 'container': continue elif event['Action'] == 'start': handle_start(event) elif event['Action'] == 'die': handle_die(event) elif event['Action'] == 'pause': handle_pause(event) elif event['Action'] == 'unpause': handle_unpause(event)
def on_executed(event: events.JobExecutionEvent) -> None: job = scheduler.get_job(event.job_id) if job is None or job.id == 'container_inspection': return definition = job.kwargs exit_code, response_lines = event.retval response_lines = response_lines.decode().splitlines() log.log( logging.INFO if exit_code == 0 else logging.CRITICAL, f'Command {definition["command"]} in container {definition["container_id"]} ' f'finished with exit code {exit_code}.', ) if response_lines: log.info("== BEGIN of captured stdout & stderr ==") for line in response_lines: log.info(line) log.info("== END of captured stdout & stderr ====")
def exec_inspection(containers: dict) -> None: # TODO handle paused containers log.info('Inspecting running containers.') for container in containers: process_running_container_labels(container.id) log.debug('Finished inspection of running containers.')
def sigterm_handler(signum, frame): log.info('Received SIGTERM.') raise SystemExit(0)
def sigint_handler(signum, frame): log.info('Keyboard interrupt.') raise SystemExit(0)