def restart(args, options): """usage: restart cluster/role/env/job [--shards=SHARDS] [--batch_size=INT] [--updater_health_check_interval_seconds=SECONDS] [--max_per_shard_failures=INT] [--max_total_failures=INT] [--restart_threshold=INT] [--watch_secs=SECONDS] Performs a rolling restart of shards within a job. Restarts are fully controlled client-side, so aborting halts the restart. """ api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die( args, options, make_client_factory()) config = get_job_config(job_key.to_path(), config_file, options) if config_file else None updater_config = UpdaterConfig( options.batch_size, options.restart_threshold, options.watch_secs, options.max_per_shard_failures, options.max_total_failures) resp = api.restart(job_key, options.shards, updater_config, options.health_check_interval_seconds, config=config) check_and_log_response(resp) handle_open(api.scheduler.scheduler().url, job_key.role, job_key.env, job_key.name)
def cancel_update(args, options): """usage: cancel_update cluster/role/env/job Unlocks a job for updates. A job may be locked if a client's update session terminated abnormally, or if another user is actively updating the job. This command should only be used when the user is confident that they are not conflicting with another user. """ api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die( args, options, make_client_factory()) config = get_job_config(job_key.to_path(), config_file, options) if config_file else None resp = api.cancel_update(job_key, config=config) check_and_log_response(resp)
def start_cron(args, options): """usage: start_cron cluster/role/env/job Invokes a cron job immediately, out of its normal cron cycle. This does not affect the cron cycle in any way. """ api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die( args, options, make_client_factory()) config = get_job_config(job_key.to_path(), config_file, options) if config_file else None resp = api.start_cronjob(job_key, config=config) check_and_log_response(resp) handle_open(api.scheduler.scheduler().url, job_key.role, job_key.env, job_key.name)
def kill(args, options): """usage: kill cluster/role/env/job Kills a running job, blocking until all tasks have terminated. Default behaviour is to kill all shards in the job, but the kill can be limited to specific shards with the --shards option """ api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die( args, options, make_client_factory()) options = app.get_options() config = get_job_config(job_key.to_path(), config_file, options) if config_file else None resp = api.kill_job(job_key, options.shards, config=config) check_and_log_response(resp) handle_open(api.scheduler.scheduler().url, job_key.role, job_key.env, job_key.name)
def status(args, options): """usage: status cluster/role/env/job Fetches and prints information about the active tasks in a job. """ def is_active(task): return task.status in ACTIVE_STATES def print_task(scheduled_task): assigned_task = scheduled_task.assignedTask taskInfo = assigned_task.task taskString = '' if taskInfo: taskString += '''cpus: %s, ram: %s MB, disk: %s MB''' % (taskInfo.numCpus, taskInfo.ramMb, taskInfo.diskMb) if assigned_task.assignedPorts: taskString += '\n\tports: %s' % assigned_task.assignedPorts taskString += '\n\tfailure count: %s (max %s)' % (scheduled_task.failureCount, taskInfo.maxTaskFailures) taskString += '\n\tevents:' for event in scheduled_task.taskEvents: taskString += '\n\t\t %s %s: %s' % (datetime.fromtimestamp(event.timestamp / 1000), ScheduleStatus._VALUES_TO_NAMES[event.status], event.message) taskString += '\n\tpackages:' for pkg in assigned_task.task.packages: taskString += ('\n\t\trole: %s, package: %s, version: %s' % (pkg.role, pkg.name, pkg.version)) return taskString def print_tasks(tasks): for task in tasks: taskString = print_task(task) log.info('role: %s, env: %s, name: %s, shard: %s, status: %s on %s\n%s' % (task.assignedTask.task.owner.role, task.assignedTask.task.environment, task.assignedTask.task.jobName, task.assignedTask.instanceId, ScheduleStatus._VALUES_TO_NAMES[task.status], task.assignedTask.slaveHost, taskString)) for pkg in task.assignedTask.task.packages: log.info('\tpackage %s/%s/%s' % (pkg.role, pkg.name, pkg.version)) api, job_key, _ = LiveJobDisambiguator.disambiguate_args_or_die( args, options, make_client_factory()) resp = api.check_status(job_key) check_and_log_response(resp) tasks = resp.result.scheduleStatusResult.tasks if tasks: active_tasks = filter(is_active, tasks) log.info('Active Tasks (%s)' % len(active_tasks)) print_tasks(active_tasks) inactive_tasks = filter(lambda x: not is_active(x), tasks) log.info('Inactive Tasks (%s)' % len(inactive_tasks)) print_tasks(inactive_tasks) else: log.info('No tasks found.')