Пример #1
0
def run(args, options):
    """usage: run cluster/role/env/job cmd

  Runs a shell command on all machines currently hosting shards of a single job.

  This feature supports the same command line wildcards that are used to
  populate a job's commands.

  This means anything in the {{mesos.*}} and {{thermos.*}} namespaces.
  """
    # TODO(William Farner): Add support for invoking on individual shards.
    # TODO(Kevin Sweeney): Restore the ability to run across jobs with globs (See MESOS-3010).
    if not args:
        die("job path is required")
    job_path = args.pop(0)
    new_cmd = ["task", "run"]
    instances_spec = job_path
    if options.num_threads != 1:
        new_cmd.append("--threads=%s" % options.num_threads)
    if options.ssh_user is not None:
        new_cmd.append("--ssh-user=%s" % options.ssh_user)
    if options.executor_sandbox:
        new_cmd.append("--executor-sandbox")
    new_cmd.append('"%s"' % " ".join(args))
    v1_deprecation_warning("ssh", new_cmd)

    try:
        cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
    except AuroraJobKey.Error as e:
        die('Invalid job path "%s": %s' % (job_path, e))

    command = " ".join(args)
    cluster = CLUSTERS[cluster_name]
    dcr = DistributedCommandRunner(cluster, role, env, [name], options.ssh_user)
    dcr.run(command, parallelism=options.num_threads, executor_sandbox=options.executor_sandbox)
Пример #2
0
def parse_and_validate_sla_overrides(options, hostnames):
    """Parses and validates host SLA override 3-tuple (percentage, duration, reason).

  In addition, logs an admin message about overriding default SLA values.

  :param options: command line options
  :type options: list of app.option
  :param hostnames: host names override is issued to
  :type hostnames: list of string
  :rtype: a tuple of: override percentage (float) and override duration (Amount)
  """
    has_override = bool(options.percentage) or bool(options.duration) or bool(
        options.reason)
    all_overrides = bool(options.percentage) and bool(
        options.duration) and bool(options.reason)
    if has_override != all_overrides:
        die('All --override_* options are required when attempting to override default SLA values.'
            )

    print(options.percentage)
    percentage = parse_sla_percentage(
        options.percentage) if options.percentage else None
    duration = parse_time(options.duration) if options.duration else None
    if options.reason:
        log_admin_message(
            logging.WARNING,
            'Default SLA values (percentage: %s, duration: %s) are overridden for the following '
            'hosts: %s. New percentage: %s, duration: %s, override reason: %s'
            % (SLA_UPTIME_PERCENTAGE_LIMIT, SLA_UPTIME_DURATION_LIMIT,
               hostnames, percentage, duration, options.reason))

    return percentage or SLA_UPTIME_PERCENTAGE_LIMIT, duration or SLA_UPTIME_DURATION_LIMIT
Пример #3
0
def parse_and_validate_sla_overrides(options, hostnames):
  """Parses and validates host SLA override 3-tuple (percentage, duration, reason).

  In addition, logs an admin message about overriding default SLA values.

  :param options: command line options
  :type options: list of app.option
  :param hostnames: host names override is issued to
  :type hostnames: list of string
  :rtype: a tuple of: override percentage (float) and override duration (Amount)
  """
  has_override = bool(options.percentage) or bool(options.duration) or bool(options.reason)
  all_overrides = bool(options.percentage) and bool(options.duration) and bool(options.reason)
  if has_override != all_overrides:
    die('All --override_* options are required when attempting to override default SLA values.')

  print(options.percentage)
  percentage = parse_sla_percentage(options.percentage) if options.percentage else None
  duration = parse_time(options.duration) if options.duration else None
  if options.reason:
    log_admin_message(
      logging.WARNING,
      'Default SLA values (percentage: %s, duration: %s) are overridden for the following '
      'hosts: %s. New percentage: %s, duration: %s, override reason: %s' % (
        SLA_UPTIME_PERCENTAGE_LIMIT,
        SLA_UPTIME_DURATION_LIMIT,
        hostnames,
        percentage,
        duration,
        options.reason))

  return percentage or SLA_UPTIME_PERCENTAGE_LIMIT, duration or SLA_UPTIME_DURATION_LIMIT
Пример #4
0
def perform_maintenance_hosts(cluster):
  """usage: perform_maintenance_hosts {--filename=filename | --hosts=hosts}
                                      [--batch_size=num]
                                      [--post_drain_script=path]
                                      [--grouping=function]
                                      cluster

  Asks the scheduler to remove any running tasks from the machine and remove it
  from service temporarily, perform some action on them, then return the machines
  to service.
  """
  options = app.get_options()
  drainable_hosts = parse_hosts(options)

  if options.post_drain_script:
    if not os.path.exists(options.post_drain_script):
      die("No such file: %s" % options.post_drain_script)
    cmd = os.path.abspath(options.post_drain_script)
    drained_callback = lambda host: subprocess.Popen([cmd, host])
  else:
    drained_callback = None

  MesosMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance(
      drainable_hosts,
      batch_size=int(options.batch_size),
      callback=drained_callback,
      grouping_function=options.grouping)
Пример #5
0
 def warn_if_dangerous_change(api, job_spec, config):
     # Get the current job status, so that we can check if there's anything
     # dangerous about this update.
     resp = api.query_no_configs(
         api.build_query(config.role(),
                         config.name(),
                         statuses=ACTIVE_STATES,
                         env=config.environment()))
     if resp.responseCode != ResponseCode.OK:
         die('Could not get job status from server for comparison: %s' %
             resp.messageDEPRECATED)
     remote_tasks = [
         t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks
     ]
     resp = api.populate_job_config(config)
     if resp.responseCode != ResponseCode.OK:
         die('Server could not populate job config for comparison: %s' %
             resp.messageDEPRECATED)
     local_task_count = len(
         resp.result.populateJobResult.populatedDEPRECATED)
     remote_task_count = len(remote_tasks)
     if (local_task_count >= 4 * remote_task_count
             or local_task_count <= 4 * remote_task_count
             or local_task_count == 0):
         print(
             'Warning: this update is a large change. Press ^c within 5 seconds to abort'
         )
         time.sleep(5)
Пример #6
0
def do_open(args, _):
    """usage: open cluster[/role[/env/job]]

  Opens the scheduler page for a cluster, role or job in the default web browser.
  """
    cluster_name = role = env = job = None
    if len(args) == 0:
        print('Open command requires a jobkey parameter.')
        exit(1)
    v1_deprecation_warning("open", ["job", "open"])
    args = args[0].split("/")
    if len(args) > 0:
        cluster_name = args[0]
        if len(args) > 1:
            role = args[1]
            if len(args) > 2:
                env = args[2]
                if len(args) > 3:
                    job = args[3]
                else:
                    # TODO(ksweeney): Remove this after MESOS-2945 is completed.
                    die('env scheduler pages are not yet implemented, please specify job'
                        )

    if not cluster_name:
        die('cluster is required')

    api = make_client(cluster_name)

    import webbrowser
    webbrowser.open_new_tab(
        synthesize_url(api.scheduler_proxy.scheduler_client().url, role, env,
                       job))
Пример #7
0
def do_open(args, _):
  """usage: open cluster[/role[/env/job]]

  Opens the scheduler page for a cluster, role or job in the default web browser.
  """
  cluster_name = role = env = job = None
  args = args[0].split("/")
  if len(args) > 0:
    cluster_name = args[0]
    if len(args) > 1:
      role = args[1]
      if len(args) > 2:
        env = args[2]
        if len(args) > 3:
          job = args[3]
        else:
          # TODO(ksweeney): Remove this after MESOS-2945 is completed.
          die('env scheduler pages are not yet implemented, please specify job')

  if not cluster_name:
    die('cluster is required')

  api = make_client(cluster_name)

  import webbrowser
  webbrowser.open_new_tab(
      synthesize_url(api.scheduler_proxy.scheduler_client().url, role, env, job))
Пример #8
0
def list_jobs(cluster_and_role):
  """usage: list_jobs [--show-cron] cluster/role/env/job

  Shows all jobs that match the job-spec known by the scheduler.
  If --show-cron is specified, then also shows the registered cron schedule.
  """
  def show_job_simple(job):
    if options.show_cron_schedule:
      print(('{0}/{1.key.role}/{1.key.environment}/{1.key.name}' +
          '\t\'{1.cronSchedule}\'\t{1.cronCollisionPolicy}').format(cluster, job))
    else:
      print('{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(cluster, job))

  def show_job_pretty(job):
    print("Job %s/%s/%s/%s:" %
        (cluster, job.key.role, job.key.environment, job.key.name))
    print('\tcron schedule: %s' % job.cronSchedule)
    print('\tcron policy:   %s' % job.cronCollisionPolicy)

  options = app.get_options()
  if options.show_cron_schedule and options.pretty:
    print_fn = show_job_pretty
  else:
    print_fn = show_job_simple
  # Take the cluster_and_role parameter, and split it into its two components.
  if cluster_and_role.count('/') != 1:
    die('list_jobs parameter must be in cluster/role format')
  (cluster,role) = cluster_and_role.split('/')
  api = make_client(cluster)
  resp = api.get_jobs(role)
  check_and_log_response(resp)
  for job in resp.result.getJobsResult.configs:
    print_fn(job)
Пример #9
0
  def disambiguate_args_or_die(cls, args, options, client_factory=AuroraClientAPI):
    """
    Returns a (AuroraClientAPI, AuroraJobKey, AuroraConfigFile:str) tuple
    if one can be found given the args, potentially querying the scheduler with the returned client.
    Calls die() with an appropriate error message otherwise.

    Arguments:
      args: args from app command invocation.
      options: options from app command invocation. must have env and cluster attributes.
      client_factory: a callable (cluster) -> AuroraClientAPI.
    """
    if not len(args) > 0:
      die('job path is required')
    try:
      job_key = AuroraJobKey.from_path(args[0])
      client = client_factory(job_key.cluster)
      config_file = args[1] if len(args) > 1 else None  # the config for hooks
      return client, job_key, config_file
    except AuroraJobKey.Error:
      log.warning("Failed to parse job path, falling back to compatibility mode")
      role = args[0] if len(args) > 0 else None
      name = args[1] if len(args) > 1 else None
      env = None
      config_file = None  # deprecated form does not support hooks functionality
      cluster = options.cluster
      if not cluster:
        die('cluster is required')
      client = client_factory(cluster)
      return client, cls._disambiguate_or_die(client, role, env, name), config_file
Пример #10
0
def make_admin_client(cluster):
    if cluster not in CLUSTERS:
        die('Unknown cluster: %s. Known clusters: %s' %
            (cluster, ", ".join(CLUSTERS.keys())))

    verbose = getattr(app.get_options(), 'verbosity', 'normal') == 'verbose'
    return AuroraClientAPI(CLUSTERS[cluster],
                           AURORA_ADMIN_USER_AGENT_NAME,
                           verbose=verbose)
Пример #11
0
def diff(job_spec, config_file):
  """usage: diff cluster/role/env/job config

  Compares a job configuration against a running job.
  By default the diff will be displayed using 'diff', though you may choose an alternate
  diff program by specifying the DIFF_VIEWER environment variable."""
  options = app.get_options()

  config = get_job_config(job_spec, config_file, options)
  if options.rename_from:
    cluster, role, env, name = options.rename_from
  else:
    cluster = config.cluster()
    role = config.role()
    env = config.environment()
    name = config.name()
  api = make_client(cluster)
  resp = api.query(api.build_query(role, name, statuses=ACTIVE_STATES, env=env))
  if resp.responseCode != ResponseCode.OK:
    die('Request failed, server responded with "%s"' % resp.messageDEPRECATED)
  remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks]
  resp = api.populate_job_config(config)
  if resp.responseCode != ResponseCode.OK:
    die('Request failed, server responded with "%s"' % resp.messageDEPRECATED)
  local_tasks = resp.result.populateJobResult.populated

  pp = pprint.PrettyPrinter(indent=2)
  def pretty_print_task(task):
    # The raw configuration is not interesting - we only care about what gets parsed.
    task.configuration = None
    task.executorConfig = ExecutorConfig(
        name=AURORA_EXECUTOR_NAME,
        data=json.loads(task.executorConfig.data))
    return pp.pformat(vars(task))

  def pretty_print_tasks(tasks):
    return ',\n'.join([pretty_print_task(t) for t in tasks])

  def dump_tasks(tasks, out_file):
    out_file.write(pretty_print_tasks(tasks))
    out_file.write('\n')
    out_file.flush()

  diff_program = os.environ.get('DIFF_VIEWER', 'diff')
  with NamedTemporaryFile() as local:
    dump_tasks(local_tasks, local)
    with NamedTemporaryFile() as remote:
      dump_tasks(remote_tasks, remote)
      result = subprocess.call([diff_program, remote.name, local.name])
      # Unlike most commands, diff doesn't return zero on success; it returns
      # 1 when a successful diff is non-empty.
      if result != 0 and result != 1:
        return result
      else:
        return 0
Пример #12
0
def parse_hosts(options):
  if bool(options.filename) == bool(options.hosts):
    die('Please specify either --filename or --hosts')
  if options.filename:
    with open(options.filename, 'r') as hosts:
      hosts = [hostname.strip() for hostname in hosts]
  elif options.hosts:
    hosts = [hostname.strip() for hostname in options.hosts.split(",")]
  if not hosts:
    die('No valid hosts found.')
  return hosts
Пример #13
0
def _validate_update_config(config):
    job_size = config.instances()
    max_failures = config.update_config().max_total_failures().get()

    if max_failures >= job_size:
        die(UPDATE_CONFIG_MAX_FAILURES_ERROR % (job_size, job_size - 1))

    if config.is_dedicated():
        min_failure_threshold = int(math.floor(job_size * 0.02))
        if max_failures < min_failure_threshold:
            die(UPDATE_CONFIG_DEDICATED_THRESHOLD_ERROR % (job_size, min_failure_threshold))
Пример #14
0
def parse_sla_percentage(percentage):
  """Parses percentage value for an SLA check.

  :param percentage: string percentage to parse
  :type percentage: string
  :rtype: float
  """
  val = float(percentage)
  if val <= 0 or val > 100:
    die('Invalid percentage %s. Must be within (0, 100].' % percentage)
  return val
Пример #15
0
def parse_sla_percentage(percentage):
    """Parses percentage value for an SLA check.

  :param percentage: string percentage to parse
  :type percentage: string
  :rtype: float
  """
    val = float(percentage)
    if val <= 0 or val > 100:
        die('Invalid percentage %s. Must be within (0, 100].' % percentage)
    return val
Пример #16
0
def make_admin_client(cluster):
  if cluster not in CLUSTERS:
    die('Unknown cluster: %s. Known clusters: %s' % (cluster, ", ".join(CLUSTERS.keys())))

  options = app.get_options()
  verbose = getattr(options, 'verbosity', 'normal') == 'verbose'

  return AuroraClientAPI(
      CLUSTERS[cluster],
      AURORA_ADMIN_USER_AGENT_NAME,
      verbose=verbose,
      bypass_leader_redirect=options.bypass_leader_redirect)
Пример #17
0
def parse_script(filename):
  """Parses shell script from the provided file and wraps it up into a subprocess callback.

  :param filename: name of the script file
  :type filename: string
  :rtype: function
  """
  if filename:
    if not os.path.exists(filename):
      die("No such file: %s" % filename)
    cmd = os.path.abspath(filename)
    return lambda host: subprocess.Popen([cmd, host])
  else:
    return None
Пример #18
0
def perform_maintenance_hosts(cluster):
    """usage: perform_maintenance_hosts {--filename=filename | --hosts=hosts}
                                      [--post_drain_script=path]
                                      [--grouping=function]
                                      [--override_percentage=percentage]
                                      [--override_duration=duration]
                                      [--override_reason=reason]
                                      [--unsafe_hosts_file=unsafe_hosts_filename]
                                      cluster

  Asks the scheduler to remove any running tasks from the machine and remove it
  from service temporarily, perform some action on them, then return the machines
  to service.
  """
    options = app.get_options()
    drainable_hosts = parse_hostnames(options.filename, options.hosts)
    get_grouping_or_die(options.grouping)

    has_override = bool(options.percentage) or bool(options.duration) or bool(options.reason)
    all_overrides = bool(options.percentage) and bool(options.duration) and bool(options.reason)
    if has_override != all_overrides:
        die("All --override_* options are required when attempting to override default SLA values.")

    percentage = parse_sla_percentage(options.percentage) if options.percentage else None
    duration = parse_time(options.duration) if options.duration else None
    if options.reason:
        log_admin_message(
            logging.WARNING,
            "Default SLA values (percentage: %s, duration: %s) are overridden for the following "
            "hosts: %s. New percentage: %s, duration: %s, override reason: %s"
            % (
                HostMaintenance.SLA_UPTIME_PERCENTAGE_LIMIT,
                HostMaintenance.SLA_UPTIME_DURATION_LIMIT,
                drainable_hosts,
                percentage,
                duration,
                options.reason,
            ),
        )

    drained_callback = parse_script(options.post_drain_script)

    HostMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance(
        drainable_hosts,
        grouping_function=options.grouping,
        callback=drained_callback,
        percentage=percentage,
        duration=duration,
        output_file=options.unsafe_hosts_filename,
    )
Пример #19
0
    def parse_jobs_file(filename):
        result = {}
        with open(filename, 'r') as overrides:
            for line in overrides:
                if not line.strip():
                    continue

                tokens = line.split()
                if len(tokens) != 3:
                    die('Invalid line in %s:%s' % (filename, line))
                job_key = AuroraJobKey.from_path(tokens[0])
                result[job_key] = JobUpTimeLimit(
                    job=job_key,
                    percentage=parse_sla_percentage(tokens[1]),
                    duration_secs=parse_time(tokens[2]).as_(Time.SECONDS))
        return result
Пример #20
0
  def parse_jobs_file(filename):
    result = {}
    with open(filename, 'r') as overrides:
      for line in overrides:
        if not line.strip():
          continue

        tokens = line.split()
        if len(tokens) != 3:
          die('Invalid line in %s:%s' % (filename, line))
        job_key = AuroraJobKey.from_path(tokens[0])
        result[job_key] = JobUpTimeLimit(
            job=job_key,
            percentage=parse_sla_percentage(tokens[1]),
            duration_secs=parse_time(tokens[2]).as_(Time.SECONDS)
        )
    return result
Пример #21
0
 def warn_if_dangerous_change(api, job_spec, config):
   # Get the current job status, so that we can check if there's anything
   # dangerous about this update.
   resp = api.query_no_configs(api.build_query(config.role(), config.name(),
       statuses=ACTIVE_STATES, env=config.environment()))
   if resp.responseCode != ResponseCode.OK:
     die('Could not get job status from server for comparison: %s' % resp.messageDEPRECATED)
   remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks]
   resp = api.populate_job_config(config)
   if resp.responseCode != ResponseCode.OK:
     die('Server could not populate job config for comparison: %s' % resp.messageDEPRECATED)
   local_task_count = len(resp.result.populateJobResult.populated)
   remote_task_count = len(remote_tasks)
   if (local_task_count >= 4 * remote_task_count or local_task_count <= 4 * remote_task_count
       or local_task_count == 0):
     print('Warning: this update is a large change. Press ^c within 5 seconds to abort')
     time.sleep(5)
Пример #22
0
def parse_script(filename):
    """Parses shell script from the provided file and wraps it up into a subprocess callback.

  :param filename: name of the script file
  :type filename: string
  :rtype: function
  """
    def callback(host):
        subprocess.Popen([cmd, host]).wait()

    if filename:
        if not os.path.exists(filename):
            die("No such file: %s" % filename)
        cmd = os.path.abspath(filename)
        return callback
    else:
        return None
Пример #23
0
def _validate_health_check_config(config):
  health_check_config = config.health_check_config().get()
  health_checker = health_check_config.get('health_checker', {})
  # If we have old-style of configuring.
  # TODO (AURORA-1563): Remove this code after we drop support for defining these directly in
  # HealthCheckConfig.
  for deprecated in {'endpoint', 'expected_response', 'expected_response_code'}:
    if deprecated in health_check_config:
      log.warn(HTTP_DEPRECATION_WARNING)
      break
  if SHELL_HEALTH_CHECK in health_checker:
    # Make sure we specified a shell_command if we chose a shell config.
    shell_health_checker = health_checker.get(SHELL_HEALTH_CHECK, {})
    shell_command = shell_health_checker.get('shell_command')
    if not shell_command:
      # Must define a command.
      die(MUST_PROVIDE_SHELL_COMMAND_ERROR)
Пример #24
0
def help_command(args):
    """usage: help [subcommand]

  Prints help for using the aurora client, or one of its specific subcommands.
  """
    if not args:
        print(generate_full_usage())
        sys.exit(0)

    if len(args) > 1:
        die('Please specify at most one subcommand.')

    subcmd = args[0]
    if subcmd in app.get_commands():
        app.command_parser(subcmd).print_help()
    else:
        print('Subcommand %s not found.' % subcmd)
        sys.exit(1)
Пример #25
0
def help(args):
  """usage: help [subcommand]

  Prints help for using the aurora client, or one of its specific subcommands.
  """
  if not args:
    print(generate_full_usage())
    sys.exit(0)

  if len(args) > 1:
    die('Please specify at most one subcommand.')

  subcmd = args[0]
  if subcmd in app.get_commands():
    app.command_parser(subcmd).print_help()
  else:
    print('Subcommand %s not found.' % subcmd)
    sys.exit(1)
Пример #26
0
def parse_hostnames_optional(list_option, file_option):
  """Parses host names from a comma-separated list or a filename.

  Does not require either of the arguments (returns None list if no option provided).

  :param list_option: command option with comma-separated list of host names
  :type list_option: app.option
  :param file_option: command option with filename (one host per line)
  :type file_option: app.option
  :rtype: list of host names or None.
  """
  if bool(list_option) and bool(file_option):
    die('Cannot specify both filename and list for the same option.')
  hostnames = None
  if file_option:
    hostnames = _parse_hostname_file(file_option)
  elif list_option:
    hostnames = _parse_hostname_list(list_option)
  return hostnames
Пример #27
0
def reconcile_tasks(cluster):
  """usage: reconcile_tasks
            [--type=RECONCILIATION_TYPE]
            [--batch_size=BATCHSIZE]
            cluster

  Reconcile the Mesos master and the scheduler. Default runs explicit
  reconciliation with a batch size set in reconciliation_explicit_batch_size
  scheduler configuration option.
  """
  options = app.get_options()
  client = make_admin_client_with_options(cluster)
  if options.type == 'implicit':
    resp = client.reconcile_implicit()
  elif options.type == 'explicit':
    resp = client.reconcile_explicit(options.batch_size)
  else:
    die('Unexpected value for --type: %s' % options.type)
  check_and_log_response(resp)
Пример #28
0
def reconcile_tasks(cluster):
    """usage: reconcile_tasks
            [--type=RECONCILIATION_TYPE]
            [--batch_size=BATCHSIZE]
            cluster

  Reconcile the Mesos master and the scheduler. Default runs explicit
  reconciliation with a batch size set in reconciliation_explicit_batch_size
  scheduler configuration option.
  """
    options = app.get_options()
    client = make_admin_client_with_options(cluster)
    if options.type == 'implicit':
        resp = client.reconcile_implicit()
    elif options.type == 'explicit':
        resp = client.reconcile_explicit(options.batch_size)
    else:
        die('Unexpected value for --type: %s' % options.type)
    check_and_log_response(resp)
Пример #29
0
def parse_hostnames_optional(list_option, file_option):
    """Parses host names from a comma-separated list or a filename.

  Does not require either of the arguments (returns None list if no option provided).

  :param list_option: command option with comma-separated list of host names
  :type list_option: app.option
  :param file_option: command option with filename (one host per line)
  :type file_option: app.option
  :rtype: list of host names or None.
  """
    if bool(list_option) and bool(file_option):
        die('Cannot specify both filename and list for the same option.')
    hostnames = None
    if file_option:
        hostnames = _parse_hostname_file(file_option)
    elif list_option:
        hostnames = _parse_hostname_list(list_option)
    return hostnames
Пример #30
0
def parse_hostnames(filename, hostnames):
    """Parses host names from a comma-separated list or a filename.

  Fails if neither filename nor hostnames provided.

  :param filename: filename with host names (one per line)
  :type filename: string
  :param hostnames: comma-separated list of host names
  :type hostnames: string
  :rtype: list of host names
  """
    if bool(filename) == bool(hostnames):
        die('Please specify either --filename or --hosts')
    if filename:
        hostnames = _parse_hostname_file(filename)
    elif hostnames:
        hostnames = _parse_hostname_list(hostnames)
    if not hostnames:
        die('No valid hosts found.')
    return hostnames
Пример #31
0
def parse_hostnames(filename, hostnames):
  """Parses host names from a comma-separated list or a filename.

  Fails if neither filename nor hostnames provided.

  :param filename: filename with host names (one per line)
  :type filename: string
  :param hostnames: comma-separated list of host names
  :type hostnames: string
  :rtype: list of host names
  """
  if bool(filename) == bool(hostnames):
    die('Please specify either --filename or --hosts')
  if filename:
    hostnames = _parse_hostname_file(filename)
  elif hostnames:
    hostnames = _parse_hostname_list(hostnames)
  if not hostnames:
    die('No valid hosts found.')
  return hostnames
Пример #32
0
def set_quota(cluster, role, cpu_str, ram, disk):
  """usage: set_quota cluster role cpu ram[MGT] disk[MGT]

  Alters the amount of production quota allocated to a user.
  """
  try:
    ram_size = parse_data(ram).as_(Data.MB)
    disk_size = parse_data(disk).as_(Data.MB)
  except ValueError as e:
    die(str(e))

  try:
    cpu = float(cpu_str)
    ram_mb = int(ram_size)
    disk_mb = int(disk_size)
  except ValueError as e:
    die(str(e))

  resp = make_admin_client(cluster).set_quota(role, cpu, ram_mb, disk_mb)
  check_and_log_response(resp)
Пример #33
0
def prune_tasks(args, options):
  if len(args) == 0:
    die('Must specify at least cluster.')
  cluster = args[0]

  t = TaskQuery()
  if options.states:
    t.statuses = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(',')))
  if options.role:
    t.role = options.role
  if options.environment:
    t.environment = options.environment
  if options.limit:
    t.limit = options.limit

  api = make_admin_client_with_options(cluster)
  rsp = api.prune_tasks(t)
  if rsp.responseCode != ResponseCode.OK:
    die('Failed to prune tasks: %s' % combine_messages(rsp))
  else:
    print("Tasks pruned.")
Пример #34
0
def set_quota(cluster, role, cpu_str, ram, disk):
  """usage: set_quota cluster role cpu ram[MGT] disk[MGT]

  Alters the amount of production quota allocated to a user.
  """
  try:
    ram_size = parse_data(ram).as_(Data.MB)
    disk_size = parse_data(disk).as_(Data.MB)
  except ValueError as e:
    die(str(e))

  try:
    cpu = float(cpu_str)
    ram_mb = int(ram_size)
    disk_mb = int(disk_size)
  except ValueError as e:
    die(str(e))

  options = app.get_options()
  resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).set_quota(role, cpu, ram_mb, disk_mb)
  check_and_log_response(resp)
Пример #35
0
def set_quota(cluster, role, cpu_str, ram, disk):
    """usage: set_quota cluster role cpu ram[MGT] disk[MGT]

  Alters the amount of production quota allocated to a user.
  """
    try:
        ram_size = parse_data(ram).as_(Data.MB)
        disk_size = parse_data(disk).as_(Data.MB)
    except ValueError as e:
        die(str(e))

    try:
        cpu = float(cpu_str)
        ram_mb = int(ram_size)
        disk_mb = int(disk_size)
    except ValueError as e:
        die(str(e))

    options = app.get_options()
    resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).set_quota(role, cpu, ram_mb, disk_mb)
    check_and_log_response(resp)
Пример #36
0
def _validate_update_config(config):
    job_size = config.instances()
    update_config = config.update_config()
    health_check_config = config.health_check_config()

    max_failures = update_config.max_total_failures().get()
    watch_secs = update_config.watch_secs().get()
    initial_interval_secs = health_check_config.initial_interval_secs().get()
    max_consecutive_failures = health_check_config.max_consecutive_failures(
    ).get()
    interval_secs = health_check_config.interval_secs().get()

    if max_failures >= job_size:
        die(UPDATE_CONFIG_MAX_FAILURES_ERROR % (job_size, job_size - 1))

    if config.is_dedicated():
        min_failure_threshold = int(math.floor(job_size * 0.02))
        if max_failures < min_failure_threshold:
            die(UPDATE_CONFIG_DEDICATED_THRESHOLD_ERROR %
                (job_size, min_failure_threshold))

    target_watch = initial_interval_secs + (max_consecutive_failures *
                                            interval_secs)
    if watch_secs <= target_watch:
        die(WATCH_SECS_INSUFFICIENT_ERROR_FORMAT %
            (watch_secs, target_watch, initial_interval_secs,
             max_consecutive_failures, interval_secs))
Пример #37
0
def _validate_update_config(config):
  job_size = config.instances()
  update_config = config.update_config()
  health_check_config = config.health_check_config()

  max_failures = update_config.max_total_failures().get()
  watch_secs = update_config.watch_secs().get()
  initial_interval_secs = health_check_config.initial_interval_secs().get()
  max_consecutive_failures = health_check_config.max_consecutive_failures().get()
  min_consecutive_successes = health_check_config.min_consecutive_successes().get()
  interval_secs = health_check_config.interval_secs().get()

  if max_failures >= job_size:
    die(UPDATE_CONFIG_MAX_FAILURES_ERROR % (job_size, job_size - 1))

  if config.is_dedicated():
    min_failure_threshold = int(math.floor(job_size * 0.02))
    if max_failures < min_failure_threshold:
      die(UPDATE_CONFIG_DEDICATED_THRESHOLD_ERROR % (job_size, min_failure_threshold))

  params = [
        (watch_secs, 'watch_secs'),
        (max_consecutive_failures, 'max_consecutive_failures'),
        (min_consecutive_successes, 'min_consecutive_successes'),
        (initial_interval_secs, 'initial_interval_secs'),
        (interval_secs, 'interval_secs')
      ]

  for value, name in params:
    if value < 0:
      die(INVALID_VALUE_ERROR_FORMAT % (value, name))
Пример #38
0
def _validate_update_config(config):
    job_size = config.instances()
    update_config = config.update_config()
    health_check_config = config.health_check_config()

    max_failures = update_config.max_total_failures().get()
    watch_secs = update_config.watch_secs().get()
    initial_interval_secs = health_check_config.initial_interval_secs().get()
    max_consecutive_failures = health_check_config.max_consecutive_failures().get()
    interval_secs = health_check_config.interval_secs().get()

    if max_failures >= job_size:
        die(UPDATE_CONFIG_MAX_FAILURES_ERROR % (job_size, job_size - 1))

    if config.is_dedicated():
        min_failure_threshold = int(math.floor(job_size * 0.02))
        if max_failures < min_failure_threshold:
            die(UPDATE_CONFIG_DEDICATED_THRESHOLD_ERROR % (job_size, min_failure_threshold))

    target_watch = initial_interval_secs + (max_consecutive_failures * interval_secs)
    if watch_secs <= target_watch:
        die(
            WATCH_SECS_INSUFFICIENT_ERROR_FORMAT
            % (watch_secs, target_watch, initial_interval_secs, max_consecutive_failures, interval_secs)
        )
Пример #39
0
def _validate_update_config(config):
    job_size = config.instances()
    update_config = config.update_config()
    health_check_config = config.health_check_config()

    max_failures = update_config.max_total_failures().get()
    watch_secs = update_config.watch_secs().get()
    initial_interval_secs = health_check_config.initial_interval_secs().get()
    max_consecutive_failures = health_check_config.max_consecutive_failures(
    ).get()
    min_consecutive_successes = health_check_config.min_consecutive_successes(
    ).get()
    interval_secs = health_check_config.interval_secs().get()

    if max_failures >= job_size:
        die(UPDATE_CONFIG_MAX_FAILURES_ERROR % (job_size, job_size - 1))

    if config.is_dedicated():
        min_failure_threshold = int(math.floor(job_size * 0.02))
        if max_failures < min_failure_threshold:
            die(UPDATE_CONFIG_DEDICATED_THRESHOLD_ERROR %
                (job_size, min_failure_threshold))

    params = [(watch_secs, 'watch_secs'),
              (max_consecutive_failures, 'max_consecutive_failures'),
              (min_consecutive_successes, 'min_consecutive_successes'),
              (initial_interval_secs, 'initial_interval_secs'),
              (interval_secs, 'interval_secs')]

    for value, name in params:
        if value < 0:
            die(INVALID_VALUE_ERROR_FORMAT % (value, name))
Пример #40
0
def make_admin_client(cluster, verbose=False, bypass_leader_redirect=False):
  """Creates an API client with the specified options for use in admin commands.

  :param cluster: The cluster to connect with.
  :type cluster: Either a string cluster name or a Cluster object.
  :param verbose: Should the client emit verbose output.
  :type verbose: bool
  :type bypass_leader_redirect: Should the client bypass the scheduler's leader redirect filter.
  :type bypass_leader_redirect: bool
  :rtype: an AuroraClientAPI instance.
  """

  is_cluster_object = isinstance(cluster, Cluster)

  if not is_cluster_object and cluster not in CLUSTERS:
    die('Unknown cluster: %s. Known clusters: %s' % (cluster, ", ".join(CLUSTERS.keys())))

  return AuroraClientAPI(
      cluster if is_cluster_object else CLUSTERS[cluster],
      AURORA_ADMIN_USER_AGENT_NAME,
      verbose=verbose,
      bypass_leader_redirect=bypass_leader_redirect)
Пример #41
0
def make_admin_client(cluster, verbose=False, bypass_leader_redirect=False):
    """Creates an API client with the specified options for use in admin commands.

  :param cluster: The cluster to connect with.
  :type cluster: Either a string cluster name or a Cluster object.
  :param verbose: Should the client emit verbose output.
  :type verbose: bool
  :type bypass_leader_redirect: Should the client bypass the scheduler's leader redirect filter.
  :type bypass_leader_redirect: bool
  :rtype: an AuroraClientAPI instance.
  """

    is_cluster_object = isinstance(cluster, Cluster)

    if not is_cluster_object and cluster not in CLUSTERS:
        die('Unknown cluster: %s. Known clusters: %s' %
            (cluster, ", ".join(CLUSTERS.keys())))

    return AuroraClientAPI(cluster if is_cluster_object else CLUSTERS[cluster],
                           AURORA_ADMIN_USER_AGENT_NAME,
                           verbose=verbose,
                           bypass_leader_redirect=bypass_leader_redirect)
Пример #42
0
def run(args, options):
    """usage: run cluster/role/env/job cmd

  Runs a shell command on all machines currently hosting shards of a single job.

  This feature supports the same command line wildcards that are used to
  populate a job's commands.

  This means anything in the {{mesos.*}} and {{thermos.*}} namespaces.
  """
    # TODO(William Farner): Add support for invoking on individual shards.
    # TODO(Kevin Sweeney): Restore the ability to run across jobs with globs (See MESOS-3010).
    if not args:
        die('job path is required')
    job_path = args.pop(0)
    new_cmd = ["task", "run"]
    instances_spec = job_path
    if options.num_threads != 1:
        new_cmd.append("--threads=%s" % options.num_threads)
    if options.ssh_user is not None:
        new_cmd.append("--ssh-user=%s" % options.ssh_user)
    if options.executor_sandbox:
        new_cmd.append("--executor-sandbox")
    new_cmd.append("\"%s\"" % " ".join(args))
    v1_deprecation_warning("ssh", new_cmd)

    try:
        cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
    except AuroraJobKey.Error as e:
        die('Invalid job path "%s": %s' % (job_path, e))

    command = ' '.join(args)
    cluster = CLUSTERS[cluster_name]
    dcr = DistributedCommandRunner(cluster, role, env, [name],
                                   options.ssh_user)
    dcr.run(command,
            parallelism=options.num_threads,
            executor_sandbox=options.executor_sandbox)
Пример #43
0
def prune_tasks(args, options):
    if len(args) == 0:
        die('Must specify at least cluster.')
    cluster = args[0]

    t = TaskQuery()
    if options.states:
        t.statuses = set(
            map(ScheduleStatus._NAMES_TO_VALUES.get,
                options.states.split(',')))
    if options.role:
        t.role = options.role
    if options.environment:
        t.environment = options.environment
    if options.limit:
        t.limit = options.limit

    api = make_admin_client_with_options(cluster)
    rsp = api.prune_tasks(t)
    if rsp.responseCode != ResponseCode.OK:
        die('Failed to prune tasks: %s' % combine_messages(rsp))
    else:
        print("Tasks pruned.")
Пример #44
0
def list_jobs(cluster_and_role):
    """usage: list_jobs [--show-cron] cluster/role/env/job

  Shows all jobs that match the job-spec known by the scheduler.
  If --show-cron is specified, then also shows the registered cron schedule.
  """
    def show_job_simple(job):
        if options.show_cron_schedule:
            print(('{0}/{1.key.role}/{1.key.environment}/{1.key.name}' +
                   '\t\'{1.cronSchedule}\'\t{1.cronCollisionPolicy}').format(
                       cluster, job))
        else:
            print('{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(
                cluster, job))

    def show_job_pretty(job):
        print("Job %s/%s/%s/%s:" %
              (cluster, job.key.role, job.key.environment, job.key.name))
        print('\tcron schedule: %s' % job.cronSchedule)
        print('\tcron policy:   %s' % job.cronCollisionPolicy)

    options = app.get_options()
    v1_deprecation_warning("list_jobs", ["job", "list", cluster_and_role])

    if options.show_cron_schedule and options.pretty:
        print_fn = show_job_pretty
    else:
        print_fn = show_job_simple
    # Take the cluster_and_role parameter, and split it into its two components.
    if cluster_and_role.count('/') != 1:
        die('list_jobs parameter must be in cluster/role format')
    cluster, role = cluster_and_role.split('/')
    api = make_client(cluster)
    resp = api.get_jobs(role)
    check_and_log_response(resp)
    for job in resp.result.getJobsResult.configs:
        print_fn(job)
Пример #45
0
def run(args, options):
  """usage: run cluster/role/env/job cmd

  Runs a shell command on all machines currently hosting shards of a single job.

  This feature supports the same command line wildcards that are used to
  populate a job's commands.

  This means anything in the {{mesos.*}} and {{thermos.*}} namespaces.
  """
  # TODO(William Farner): Add support for invoking on individual shards.
  # TODO(Kevin Sweeney): Restore the ability to run across jobs with globs (See MESOS-3010).
  if not args:
    die('job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))

  command = ' '.join(args)
  cluster = CLUSTERS[cluster_name]
  dcr = DistributedCommandRunner(cluster, role, env, [name], options.ssh_user)
  dcr.run(command, parallelism=options.num_threads, executor_sandbox=options.executor_sandbox)
Пример #46
0
  def _disambiguate_or_die(cls, client, role, env, name):
    # Returns a single AuroraJobKey if one can be found given the args, potentially
    # querying the scheduler. Calls die() with an appropriate error message otherwise.
    try:
      disambiguator = cls(client, role, env, name)
    except ValueError as e:
      die(e)

    if not disambiguator.ambiguous:
      return AuroraJobKey(client.cluster.name, role, env, name)

    deprecation_warning("Job ambiguously specified - querying the scheduler to disambiguate")
    matches = disambiguator.query_matches()
    if len(matches) == 1:
      (match,) = matches
      log.info("Found job %s" % match)
      return match
    elif len(matches) == 0:
      die("No jobs found")
    else:
      die("Multiple jobs match (%s) - disambiguate by using the CLUSTER/ROLE/ENV/NAME form"
          % ",".join(str(m) for m in matches))
Пример #47
0
def _parse_hostname_list(hostname_list):
    hostnames = [hostname.strip() for hostname in hostname_list.split(",")]
    if not hostnames:
        die('No valid hosts found.')
    return hostnames
Пример #48
0
def query(args, options):
  """usage: query [--force]
                  [--listformat=FORMAT]
                  [--shards=N[,N,...]]
                  [--states=State[,State,...]]
                  cluster [role [job]]

  Query Mesos about jobs and tasks.
  """
  def _convert_fmt_string(fmtstr):
    import re
    def convert(match):
      return "%%(%s)s" % match.group(1)
    return re.sub(r'%(\w+)%', convert, fmtstr)

  def flatten_task(t, d={}):
    for key in t.__dict__.keys():
      val = getattr(t, key)
      try:
        val.__dict__.keys()
      except AttributeError:
        d[key] = val
      else:
        flatten_task(val, d)

    return d

  def map_values(d):
    default_value = lambda v: v
    mapping = {
      'status': lambda v: ScheduleStatus._VALUES_TO_NAMES[v],
    }
    return dict(
      (k, mapping.get(k, default_value)(v)) for (k, v) in d.items()
    )

  for state in options.states.split(','):
    if state not in ScheduleStatus._NAMES_TO_VALUES:
      msg = "Unknown state '%s' specified.  Valid states are:\n" % state
      msg += ','.join(ScheduleStatus._NAMES_TO_VALUES.keys())
      die(msg)

  # Role, Job, Instances, States, and the listformat
  if len(args) == 0:
    die('Must specify at least cluster.')

  cluster = args[0]
  role = args[1] if len(args) > 1 else None
  job = args[2] if len(args) > 2 else None
  instances = set(map(int, options.shards.split(','))) if options.shards else set()

  if options.states:
    states = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(',')))
  else:
    states = ACTIVE_STATES | TERMINAL_STATES
  listformat = _convert_fmt_string(options.listformat)

  #  Figure out "expensive" queries here and bone if they do not have --force
  #  - Does not specify role
  if not role and not options.force:
    die('--force is required for expensive queries (no role specified)')

  #  - Does not specify job
  if not job and not options.force:
    die('--force is required for expensive queries (no job specified)')

  #  - Specifies status outside of ACTIVE_STATES
  if not (states <= ACTIVE_STATES) and not options.force:
    die('--force is required for expensive queries (states outside ACTIVE states')

  api = make_admin_client(cluster)

  query_info = api.query(TaskQuery(role=role, jobName=job, instanceIds=instances, statuses=states))
  if query_info.responseCode != ResponseCode.OK:
    die('Failed to query scheduler: %s' % combine_messages(query_info))

  tasks = query_info.result.scheduleStatusResult.tasks
  if tasks is None:
    return

  try:
    for task in tasks:
      d = flatten_task(task)
      print(listformat % map_values(d))
  except KeyError:
    msg = "Unknown key in format string.  Valid keys are:\n"
    msg += ','.join(d.keys())
    die(msg)
Пример #49
0
def _parse_hostname_file(filename):
    with open(filename, 'r') as hosts:
        hostnames = [hostname.strip() for hostname in hosts]
    if not hostnames:
        die('No valid hosts found in %s.' % filename)
    return hostnames
Пример #50
0
def ssh(args, options):
  """usage: ssh cluster/role/env/job shard [args...]

  Initiate an SSH session on the machine that a shard is running on.
  """
  if not args:
    die('Job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))
  if not args:
    die('Shard is required')
  try:
    shard = int(args.pop(0))
  except ValueError:
    die('Shard must be an integer')
  api = make_client(cluster_name)
  resp = api.query(api.build_query(role, name, set([int(shard)]), env=env))
  check_and_log_response(resp)

  first_task = resp.result.scheduleStatusResult.tasks[0]
  remote_cmd = 'bash' if not args else ' '.join(args)
  command = DistributedCommandRunner.substitute(remote_cmd, first_task,
      api.cluster, executor_sandbox=options.executor_sandbox)

  ssh_command = ['ssh', '-t']

  role = first_task.assignedTask.task.owner.role
  slave_host = first_task.assignedTask.slaveHost

  for tunnel in options.tunnels:
    try:
      port, name = tunnel.split(':')
      port = int(port)
    except ValueError:
      die('Could not parse tunnel: %s.  Must be of form PORT:NAME' % tunnel)
    if name not in first_task.assignedTask.assignedPorts:
      die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name))
    ssh_command += [
        '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])]

  ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command]
  return subprocess.call(ssh_command)
Пример #51
0
def ssh(args, options):
  """usage: ssh cluster/role/env/job shard [args...]

  Initiate an SSH session on the machine that a shard is running on.
  """
  if not args:
    die('Job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))
  if not args:
    die('Shard is required')
  try:
    shard = int(args.pop(0))
  except ValueError:
    die('Shard must be an integer')

  newcmd = ["task", "ssh", "%s/%s" % (job_path, shard)]
  if len(options.tunnels) > 0:
    newcmd.append("--tunnels=%s" % options.tunnels)
  if options.ssh_user is not None:
    newcmd.append("--ssh-user=%s" % options.ssh_user)
  if options.executor_sandbox:
    newcmd.append("--executor-sandbox")
  if len(args) > 0:
    newcmd.append("--command=\"%s\"" % " ".join(args))
  v1_deprecation_warning("ssh", newcmd)

  api = make_client(cluster_name)
  resp = api.query(api.build_query(role, name, set([int(shard)]), env=env))
  check_and_log_response(resp)

  if (resp.result.scheduleStatusResult.tasks is None or
      len(resp.result.scheduleStatusResult.tasks) == 0):
    die("Job %s not found" % job_path)
  first_task = resp.result.scheduleStatusResult.tasks[0]
  remote_cmd = 'bash' if not args else ' '.join(args)
  command = DistributedCommandRunner.substitute(remote_cmd, first_task,
      api.cluster, executor_sandbox=options.executor_sandbox)

  ssh_command = ['ssh', '-t']


  role = first_task.assignedTask.task.owner.role
  slave_host = first_task.assignedTask.slaveHost

  for tunnel in options.tunnels:
    try:
      port, name = tunnel.split(':')
      port = int(port)
    except ValueError:
      die('Could not parse tunnel: %s.  Must be of form PORT:NAME' % tunnel)
    if name not in first_task.assignedTask.assignedPorts:
      die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name))
    ssh_command += [
        '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])]

  ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command]
  return subprocess.call(ssh_command)
Пример #52
0
def wait_kill_tasks(scheduler, job_key, instances=None):
    monitor = JobMonitor(scheduler, job_key)
    if not monitor.wait_until(
            monitor.terminal, instances=instances, with_timeout=True):
        die('Tasks were not killed in time.')
Пример #53
0
 def test_die(self, mock_log, mock_sys_exit):
     msg = 'fatal message'
     out = base.die(msg)
     self.assertIsNone(out)
     mock_sys_exit.assert_called_once_with(1)
     mock_log.assert_called_once_with(msg)
Пример #54
0
def _validate_health_check_config(config):
    # TODO(Sathya): Remove this check after health_check_interval_secs deprecation cycle is complete.
    if config.raw().has_health_check_interval_secs() and config.raw(
    ).has_health_check_config():
        die(HEALTH_CHECK_INTERVAL_SECS_ERROR)
Пример #55
0
def diff(job_spec, config_file):
    """usage: diff cluster/role/env/job config

  Compares a job configuration against a running job.
  By default the diff will be displayed using 'diff', though you may choose an alternate
  diff program by specifying the DIFF_VIEWER environment variable."""
    options = app.get_options()

    newcmd = ["job", "diff", job_spec, config_file]
    if options.json:
        newcmd.append("--read-json")

    v1_deprecation_warning("diff", newcmd)

    config = get_job_config(job_spec, config_file, options)
    if options.rename_from:
        cluster, role, env, name = options.rename_from
    else:
        cluster = config.cluster()
        role = config.role()
        env = config.environment()
        name = config.name()
    api = make_client(cluster)
    resp = api.query(
        api.build_query(role, name, statuses=ACTIVE_STATES, env=env))
    if resp.responseCode != ResponseCode.OK:
        die('Request failed, server responded with "%s"' %
            resp.messageDEPRECATED)
    remote_tasks = [
        t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks
    ]
    resp = api.populate_job_config(config)
    if resp.responseCode != ResponseCode.OK:
        die('Request failed, server responded with "%s"' %
            resp.messageDEPRECATED)
    local_tasks = resp.result.populateJobResult.populatedDEPRECATED

    pp = pprint.PrettyPrinter(indent=2)

    def pretty_print_task(task):
        # The raw configuration is not interesting - we only care about what gets parsed.
        task.configuration = None
        task.executorConfig = ExecutorConfig(name=AURORA_EXECUTOR_NAME,
                                             data=json.loads(
                                                 task.executorConfig.data))
        return pp.pformat(vars(task))

    def pretty_print_tasks(tasks):
        return ',\n'.join([pretty_print_task(t) for t in tasks])

    def dump_tasks(tasks, out_file):
        out_file.write(pretty_print_tasks(tasks))
        out_file.write('\n')
        out_file.flush()

    diff_program = os.environ.get('DIFF_VIEWER', 'diff')
    with NamedTemporaryFile() as local:
        dump_tasks(local_tasks, local)
        with NamedTemporaryFile() as remote:
            dump_tasks(remote_tasks, remote)
            result = subprocess.call([diff_program, remote.name, local.name])
            # Unlike most commands, diff doesn't return zero on success; it returns
            # 1 when a successful diff is non-empty.
            if result != 0 and result != 1:
                return result
            else:
                return 0