Exemplo n.º 1
0
def really_killall(args, options):
  """Helper for testing purposes: make it easier to mock out the actual kill process,
  while testing hooks in the command dispatch process.
  """
  maybe_disable_hooks(options)
  job_key = AuroraJobKey.from_path(args[0])
  config_file = args[1] if len(args) > 1 else None  # the config for hooks
  new_cmd = ["job", "killall", args[0]]
  if config_file is not None:
    new_cmd.append("--config=%s" % config_file)
  if options.open_browser:
    new_cmd.append("--open-browser")
  if options.batch_size is not None:
    new_cmd.append("--batch-size=%s" % options.batch_size)
  if options.max_total_failures is not None:
    new_cmd.append("--max-total-failures=%s" % options.max_total_failures)
  v1_deprecation_warning("killall", new_cmd)

  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  api = make_client(job_key.cluster)
  if options.batch_size is not None:
    kill_in_batches(api, job_key, None, options.batch_size, options.max_failures_option)
  else:
    resp = api.kill_job(job_key, None, config=config)
    check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
  wait_kill_tasks(api.scheduler_proxy, job_key)
Exemplo n.º 2
0
def test_make_client_hooks_disabled():
    with patch('apache.aurora.client.factory.CLUSTERS', new=TEST_CLUSTERS):
        client = make_client(TEST_CLUSTER,
                             'some-user-agent',
                             enable_hooks=False)
        assert not isinstance(client, HookedAuroraClientAPI)
        assert isinstance(client, AuroraClientAPI)
Exemplo n.º 3
0
def do_open(args, _):
  """usage: open cluster[/role[/env/job]]

  Opens the scheduler page for a cluster, role or job in the default web browser.
  """
  cluster_name = role = env = job = None
  args = args[0].split("/")
  if len(args) > 0:
    cluster_name = args[0]
    if len(args) > 1:
      role = args[1]
      if len(args) > 2:
        env = args[2]
        if len(args) > 3:
          job = args[3]
        else:
          # TODO(ksweeney): Remove this after MESOS-2945 is completed.
          die('env scheduler pages are not yet implemented, please specify job')

  if not cluster_name:
    die('cluster is required')

  api = make_client(cluster_name)

  import webbrowser
  webbrowser.open_new_tab(
      synthesize_url(api.scheduler_proxy.scheduler_client().url, role, env, job))
Exemplo n.º 4
0
def really_update(job_spec, config_file, options):
  def warn_if_dangerous_change(api, job_spec, config):
    # Get the current job status, so that we can check if there's anything
    # dangerous about this update.
    resp = api.query_no_configs(api.build_query(config.role(), config.name(),
        statuses=ACTIVE_STATES, env=config.environment()))
    if resp.responseCode != ResponseCode.OK:
      die('Could not get job status from server for comparison: %s' % resp.messageDEPRECATED)
    remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks]
    resp = api.populate_job_config(config)
    if resp.responseCode != ResponseCode.OK:
      die('Server could not populate job config for comparison: %s' % resp.messageDEPRECATED)
    local_task_count = len(resp.result.populateJobResult.populated)
    remote_task_count = len(remote_tasks)
    if (local_task_count >= 4 * remote_task_count or local_task_count <= 4 * remote_task_count
        or local_task_count == 0):
      print('Warning: this update is a large change. Press ^c within 5 seconds to abort')
      time.sleep(5)

  maybe_disable_hooks(options)
  config = get_job_config(job_spec, config_file, options)
  api = make_client(config.cluster())
  if not options.force:
    warn_if_dangerous_change(api, job_spec, config)
  resp = api.update_job(config, options.health_check_interval_seconds, options.shards)
  check_and_log_response(resp)
Exemplo n.º 5
0
def really_killall(args, options):
    """Helper for testing purposes: make it easier to mock out the actual kill process,
  while testing hooks in the command dispatch process.
  """
    maybe_disable_hooks(options)
    job_key = AuroraJobKey.from_path(args[0])
    config_file = args[1] if len(args) > 1 else None  # the config for hooks
    new_cmd = ["job", "killall", args[0]]
    if config_file is not None:
        new_cmd.append("--config=%s" % config_file)
    if options.open_browser:
        new_cmd.append("--open-browser")
    if options.batch_size is not None:
        new_cmd.append("--batch-size=%s" % options.batch_size)
    if options.max_total_failures is not None:
        new_cmd.append("--max-total-failures=%s" % options.max_total_failures)
    v1_deprecation_warning("killall", new_cmd)

    config = get_job_config(job_key.to_path(), config_file,
                            options) if config_file else None
    api = make_client(job_key.cluster)
    if options.batch_size is not None:
        kill_in_batches(api, job_key, None, options.batch_size,
                        options.max_failures_option)
    else:
        resp = api.kill_job(job_key, None, config=config)
        check_and_log_response(resp)
    handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role,
                job_key.env, job_key.name)
    wait_kill_tasks(api.scheduler_proxy, job_key)
Exemplo n.º 6
0
def do_open(args, _):
    """usage: open cluster[/role[/env/job]]

  Opens the scheduler page for a cluster, role or job in the default web browser.
  """
    cluster_name = role = env = job = None
    if len(args) == 0:
        print('Open command requires a jobkey parameter.')
        exit(1)
    v1_deprecation_warning("open", ["job", "open"])
    args = args[0].split("/")
    if len(args) > 0:
        cluster_name = args[0]
        if len(args) > 1:
            role = args[1]
            if len(args) > 2:
                env = args[2]
                if len(args) > 3:
                    job = args[3]
                else:
                    # TODO(ksweeney): Remove this after MESOS-2945 is completed.
                    die('env scheduler pages are not yet implemented, please specify job'
                        )

    if not cluster_name:
        die('cluster is required')

    api = make_client(cluster_name)

    import webbrowser
    webbrowser.open_new_tab(
        synthesize_url(api.scheduler_proxy.scheduler_client().url, role, env,
                       job))
Exemplo n.º 7
0
def list_jobs(cluster_and_role):
  """usage: list_jobs [--show-cron] cluster/role/env/job

  Shows all jobs that match the job-spec known by the scheduler.
  If --show-cron is specified, then also shows the registered cron schedule.
  """
  def show_job_simple(job):
    if options.show_cron_schedule:
      print(('{0}/{1.key.role}/{1.key.environment}/{1.key.name}' +
          '\t\'{1.cronSchedule}\'\t{1.cronCollisionPolicy}').format(cluster, job))
    else:
      print('{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(cluster, job))

  def show_job_pretty(job):
    print("Job %s/%s/%s/%s:" %
        (cluster, job.key.role, job.key.environment, job.key.name))
    print('\tcron schedule: %s' % job.cronSchedule)
    print('\tcron policy:   %s' % job.cronCollisionPolicy)

  options = app.get_options()
  if options.show_cron_schedule and options.pretty:
    print_fn = show_job_pretty
  else:
    print_fn = show_job_simple
  # Take the cluster_and_role parameter, and split it into its two components.
  if cluster_and_role.count('/') != 1:
    die('list_jobs parameter must be in cluster/role format')
  (cluster,role) = cluster_and_role.split('/')
  api = make_client(cluster)
  resp = api.get_jobs(role)
  check_and_log_response(resp)
  for job in resp.result.getJobsResult.configs:
    print_fn(job)
    def restart_job(self, cluster, role, environment, jobname, jobspec=None, instances=[]):
        """Method to restart aurora job"""

        job_key = AuroraJobKey(cluster, role, environment, jobname)
        logger.info("request to restart => %s", job_key.to_path())

        instances = self.pack_instance_list(instances)
        try:
            config = self.make_job_config(job_key, jobspec)
        except Exception as e:
            return(job_key.to_path(), ["Failed to restart Aurora job",
                                       "Can not create job configuration object because", str(e)])

        # these are the default values from apache.aurora.client.commands.core.restart()
        updater_config = UpdaterConfig(
            1,          # options.batch_size
            60,         # options.restart_threshold
            30,         # options.watch_secs
            0,          # options.max_per_shard_failures
            0           # options.max_total_failures
        )

        api = make_client(job_key.cluster)
        # instances = all shards, health check = 3 sec
        resp = api.restart(job_key, instances, updater_config, 3, config=config)
        if resp.responseCode != ResponseCode.OK:
            logger.warning("aurora -- restart job failed")
            responseStr = self.response_string(resp)
            logger.warning(responseStr)
            return(job_key.to_path(), ["Error reported by aurora client:", responseStr])

        logger.info("aurora -- restart job successful")
        return(job_key.to_path(), None)
Exemplo n.º 9
0
def ssh(args, options):
  """usage: ssh cluster/role/env/job shard [args...]

  Initiate an SSH session on the machine that a shard is running on.
  """
  if not args:
    die('Job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))
  if not args:
    die('Shard is required')
  try:
    shard = int(args.pop(0))
  except ValueError:
    die('Shard must be an integer')

  newcmd = ["task", "ssh", "%s/%s" % (job_path, shard)]
  if len(options.tunnels) > 0:
    newcmd.append("--tunnels=%s" % options.tunnels)
  if options.ssh_user is not None:
    newcmd.append("--ssh-user=%s" % options.ssh_user)
  if options.executor_sandbox:
    newcmd.append("--executor-sandbox")
  if len(args) > 0:
    newcmd.append("--command=\"%s\"" % " ".join(args))
  v1_deprecation_warning("ssh", newcmd)

  api = make_client(cluster_name)
  resp = api.query(api.build_query(role, name, set([int(shard)]), env=env))
  check_and_log_response(resp)

  first_task = resp.result.scheduleStatusResult.tasks[0]
  remote_cmd = 'bash' if not args else ' '.join(args)
  command = DistributedCommandRunner.substitute(remote_cmd, first_task,
      api.cluster, executor_sandbox=options.executor_sandbox)

  ssh_command = ['ssh', '-t']


  role = first_task.assignedTask.task.owner.role
  slave_host = first_task.assignedTask.slaveHost

  for tunnel in options.tunnels:
    try:
      port, name = tunnel.split(':')
      port = int(port)
    except ValueError:
      die('Could not parse tunnel: %s.  Must be of form PORT:NAME' % tunnel)
    if name not in first_task.assignedTask.assignedPorts:
      die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name))
    ssh_command += [
        '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])]

  ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command]
  return subprocess.call(ssh_command)
Exemplo n.º 10
0
 def get_api(self, cluster):
     """Gets an API object for a specified cluster
 Keeps the API handle cached, so that only one handle for each cluster will be created in a
 session.
 """
     if cluster not in self.apis:
         api = make_client(cluster)
         self.apis[cluster] = api
     return self.apis[cluster]
Exemplo n.º 11
0
 def get_api(self, cluster):
     """Gets an API object for a specified cluster
 Keeps the API handle cached, so that only one handle for each cluster will be created in a
 session.
 """
     if cluster not in self.apis:
         api = make_client(cluster)
         self.apis[cluster] = api
     return self.apis[cluster]
Exemplo n.º 12
0
def diff(job_spec, config_file):
  """usage: diff cluster/role/env/job config

  Compares a job configuration against a running job.
  By default the diff will be displayed using 'diff', though you may choose an alternate
  diff program by specifying the DIFF_VIEWER environment variable."""
  options = app.get_options()

  config = get_job_config(job_spec, config_file, options)
  if options.rename_from:
    cluster, role, env, name = options.rename_from
  else:
    cluster = config.cluster()
    role = config.role()
    env = config.environment()
    name = config.name()
  api = make_client(cluster)
  resp = api.query(api.build_query(role, name, statuses=ACTIVE_STATES, env=env))
  if resp.responseCode != ResponseCode.OK:
    die('Request failed, server responded with "%s"' % resp.messageDEPRECATED)
  remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks]
  resp = api.populate_job_config(config)
  if resp.responseCode != ResponseCode.OK:
    die('Request failed, server responded with "%s"' % resp.messageDEPRECATED)
  local_tasks = resp.result.populateJobResult.populated

  pp = pprint.PrettyPrinter(indent=2)
  def pretty_print_task(task):
    # The raw configuration is not interesting - we only care about what gets parsed.
    task.configuration = None
    task.executorConfig = ExecutorConfig(
        name=AURORA_EXECUTOR_NAME,
        data=json.loads(task.executorConfig.data))
    return pp.pformat(vars(task))

  def pretty_print_tasks(tasks):
    return ',\n'.join([pretty_print_task(t) for t in tasks])

  def dump_tasks(tasks, out_file):
    out_file.write(pretty_print_tasks(tasks))
    out_file.write('\n')
    out_file.flush()

  diff_program = os.environ.get('DIFF_VIEWER', 'diff')
  with NamedTemporaryFile() as local:
    dump_tasks(local_tasks, local)
    with NamedTemporaryFile() as remote:
      dump_tasks(remote_tasks, remote)
      result = subprocess.call([diff_program, remote.name, local.name])
      # Unlike most commands, diff doesn't return zero on success; it returns
      # 1 when a successful diff is non-empty.
      if result != 0 and result != 1:
        return result
      else:
        return 0
Exemplo n.º 13
0
  def get_api(self, cluster, enable_hooks=True):
    """Gets an API object for a specified cluster
    Keeps the API handle cached, so that only one handle for each cluster will be created in a
    session.
    """
    apis = self.apis if enable_hooks else self.unhooked_apis

    if cluster not in apis:
      api = make_client(cluster, AURORA_V2_USER_AGENT_NAME, enable_hooks)
      apis[cluster] = api
    return apis[cluster]
Exemplo n.º 14
0
    def get_api(self, cluster, enable_hooks=True):
        """Gets an API object for a specified cluster
    Keeps the API handle cached, so that only one handle for each cluster will be created in a
    session.
    """
        apis = self.apis if enable_hooks else self.unhooked_apis

        if cluster not in apis:
            api = make_client(cluster, AURORA_V2_USER_AGENT_NAME, enable_hooks)
            apis[cluster] = api
        return apis[cluster]
Exemplo n.º 15
0
def get_quota(role):
  """usage: get_quota --cluster=CLUSTER role

  Prints the production quota that has been allocated to a user.
  """
  options = app.get_options()
  resp = make_client(options.cluster).get_quota(role)

  print_quota(resp.result.getQuotaResult.quota, 'Total allocated quota', role)

  if resp.result.getQuotaResult.consumed:
    print_quota(resp.result.getQuotaResult.consumed, 'Consumed quota', role)
Exemplo n.º 16
0
def killall(args, options):
  """usage: killall cluster/role/env/job
  Kills all tasks in a running job, blocking until all specified tasks have been terminated.
  """

  job_key = AuroraJobKey.from_path(args[0])
  config_file = args[1] if len(args) > 1 else None  # the config for hooks
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  api = make_client(job_key.cluster)
  resp = api.kill_job(job_key, None, config=config)
  check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
Exemplo n.º 17
0
def really_create(job_spec, config_file, options):
  try:
    config = get_job_config(job_spec, config_file, options)
  except ValueError as v:
    print("Error: %s" % v)
    sys.exit(1)
  api = make_client(config.cluster())
  resp = api.create_job(config)
  check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, config.role(), config.environment(),
      config.name())
  if options.wait_until == 'RUNNING':
    JobMonitor(api.scheduler_proxy, config.job_key()).wait_until(JobMonitor.running_or_finished)
  elif options.wait_until == 'FINISHED':
    JobMonitor(api.scheduler_proxy, config.job_key()).wait_until(JobMonitor.terminal)
Exemplo n.º 18
0
def get_quota(role):
  """usage: get_quota --cluster=CLUSTER role

  Prints the production quota that has been allocated to a user.
  """
  options = app.get_options()
  resp = make_client(options.cluster).get_quota(role)
  quota = resp.result.getQuotaResult.quota

  quota_fields = [
    ('CPU', quota.numCpus),
    ('RAM', '%f GB' % (float(quota.ramMb) / 1024)),
    ('Disk', '%f GB' % (float(quota.diskMb) / 1024))
  ]
  log.info('Quota for %s:\n\t%s' %
           (role, '\n\t'.join(['%s\t%s' % (k, v) for (k, v) in quota_fields])))
Exemplo n.º 19
0
def killall(args, options):
  """usage: killall cluster/role/env/job
  Kills all tasks in a running job, blocking until all specified tasks have been terminated.
  """
  maybe_disable_hooks(options)
  job_key = AuroraJobKey.from_path(args[0])
  config_file = args[1] if len(args) > 1 else None  # the config for hooks
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  api = make_client(job_key.cluster)
  if options.batch_size is not None:
    kill_in_batches(api, job_key, None, options.batch_size, options.max_failures_option)
  else:
    resp = api.kill_job(job_key, None, config=config)
    check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
  wait_kill_tasks(api.scheduler_proxy, job_key)
Exemplo n.º 20
0
def really_create(job_spec, config_file, options):
    try:
        config = get_job_config(job_spec, config_file, options)
    except ValueError as v:
        print("Error: %s" % v)
        sys.exit(1)
    api = make_client(config.cluster())
    resp = api.create_job(config)
    check_and_log_response(resp)
    handle_open(api.scheduler_proxy.scheduler_client().url, config.role(),
                config.environment(), config.name())
    if options.wait_until == 'RUNNING':
        JobMonitor(api.scheduler_proxy,
                   config.job_key()).wait_until(JobMonitor.running_or_finished)
    elif options.wait_until == 'FINISHED':
        JobMonitor(api.scheduler_proxy,
                   config.job_key()).wait_until(JobMonitor.terminal)
Exemplo n.º 21
0
def update(job_spec, config_file):
  """usage: update cluster/role/env/job config

  Performs a rolling upgrade on a running job, using the update configuration
  within the config file as a control for update velocity and failure tolerance.

  Updates are fully controlled client-side, so aborting an update halts the
  update and leaves the job in a 'locked' state on the scheduler.
  Subsequent update attempts will fail until the update is 'unlocked' using the
  'cancel_update' command.

  The updater only takes action on shards in a job that have changed, meaning
  that changing a single shard will only induce a restart on the changed shard.

  You may want to consider using the 'diff' subcommand before updating,
  to preview what changes will take effect.
  """
  def warn_if_dangerous_change(api, job_spec, config):
    # Get the current job status, so that we can check if there's anything
    # dangerous about this update.
    resp = api.query_no_configs(api.build_query(config.role(), config.name(),
        statuses=ACTIVE_STATES, env=config.environment()))
    if resp.responseCode != ResponseCode.OK:
      die('Could not get job status from server for comparison: %s' % resp.messageDEPRECATED)
    remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks]
    resp = api.populate_job_config(config)
    if resp.responseCode != ResponseCode.OK:
      die('Server could not populate job config for comparison: %s' % resp.messageDEPRECATED)
    local_task_count = len(resp.result.populateJobResult.populated)
    remote_task_count = len(remote_tasks)
    if (local_task_count >= 4 * remote_task_count or local_task_count <= 4 * remote_task_count
        or local_task_count == 0):
      print('Warning: this update is a large change. Press ^c within 5 seconds to abort')
      time.sleep(5)

  options = app.get_options()
  CoreCommandHook.run_hooks("update", options, job_spec, config_file)
  maybe_disable_hooks(options)
  config = get_job_config(job_spec, config_file, options)
  api = make_client(config.cluster())
  if not options.force:
    warn_if_dangerous_change(api, job_spec, config)
  resp = api.update_job(config, options.health_check_interval_seconds, options.shards)
  check_and_log_response(resp)
Exemplo n.º 22
0
def get_quota(role):
  """usage: get_quota --cluster=CLUSTER role

  Prints the production quota that has been allocated to a user.
  """
  options = app.get_options()
  resp = make_client(options.cluster).get_quota(role)
  quota_result = resp.result.getQuotaResult
  print_quota(quota_result.quota, 'Total allocated quota', role)

  if resp.result.getQuotaResult.prodConsumption:
    print_quota(quota_result.prodConsumption,
                'Resources consumed by production jobs',
                role)

  if resp.result.getQuotaResult.nonProdConsumption:
    print_quota(quota_result.nonProdConsumption,
                'Resources consumed by non-production jobs',
                role)
Exemplo n.º 23
0
def get_quota(role):
    """usage: get_quota --cluster=CLUSTER role

  Prints the production quota that has been allocated to a user.
  """
    options = app.get_options()
    v1_deprecation_warning(
        "get_quota",
        ["quota", "get", "%s/%s" % (options.cluster, role)])
    resp = make_client(options.cluster).get_quota(role)
    quota_result = resp.result.getQuotaResult
    print_quota(quota_result.quota, 'Total allocated quota', role)

    if resp.result.getQuotaResult.prodConsumption:
        print_quota(quota_result.prodConsumption,
                    'Resources consumed by production jobs', role)

    if resp.result.getQuotaResult.nonProdConsumption:
        print_quota(quota_result.nonProdConsumption,
                    'Resources consumed by non-production jobs', role)
Exemplo n.º 24
0
def create(job_spec, config_file):
  """usage: create cluster/role/env/job config

  Creates a job based on a configuration file.
  """
  options = app.get_options()
  try:
    config = get_job_config(job_spec, config_file, options)
  except ValueError as v:
    print("Error: %s" % v)
    sys.exit(1)
  api = make_client(config.cluster())
  monitor = JobMonitor(api, config.role(), config.environment(), config.name())
  resp = api.create_job(config)
  check_and_log_response(resp)
  handle_open(api.scheduler.scheduler().url, config.role(), config.environment(), config.name())
  if options.wait_until == 'RUNNING':
    monitor.wait_until(monitor.running_or_finished)
  elif options.wait_until == 'FINISHED':
    monitor.wait_until(monitor.terminal)
    def cancel_update_job(self, cluster, role, environment, jobname, jobspec=None):
        """Method to cancel an update of aurora job"""

        job_key = AuroraJobKey(cluster, role, environment, jobname)
        logger.info("request to cancel update of => %s", job_key.to_path())

        try:
            config = self.make_job_config(job_key, jobspec)
        except Exception as e:
            return(job_key.to_path(), ["Failed to cancel update of Aurora job",
                                       "Can not create job configuration object because", str(e)])

        api = make_client(cluster)
        resp = api.cancel_update(job_key, config=config)
        if resp.responseCode != ResponseCode.OK:
            logger.warning("aurora -- cancel the update of job failed")
            responseStr = self.response_string(resp)
            logger.warning(responseStr)
            return(job_key.to_path(), ["Error reported by aurora client:", responseStr])

        logger.info("aurora -- cancel of update job successful")
        return(job_key.to_path(), None)
Exemplo n.º 26
0
def really_update(job_spec, config_file, options):
    def warn_if_dangerous_change(api, job_spec, config):
        # Get the current job status, so that we can check if there's anything
        # dangerous about this update.
        resp = api.query_no_configs(
            api.build_query(config.role(),
                            config.name(),
                            statuses=ACTIVE_STATES,
                            env=config.environment()))
        if resp.responseCode != ResponseCode.OK:
            die('Could not get job status from server for comparison: %s' %
                resp.messageDEPRECATED)
        remote_tasks = [
            t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks
        ]
        resp = api.populate_job_config(config)
        if resp.responseCode != ResponseCode.OK:
            die('Server could not populate job config for comparison: %s' %
                resp.messageDEPRECATED)
        local_task_count = len(
            resp.result.populateJobResult.populatedDEPRECATED)
        remote_task_count = len(remote_tasks)
        if (local_task_count >= 4 * remote_task_count
                or local_task_count <= 4 * remote_task_count
                or local_task_count == 0):
            print(
                'Warning: this update is a large change. Press ^c within 5 seconds to abort'
            )
            time.sleep(5)

    maybe_disable_hooks(options)
    config = get_job_config(job_spec, config_file, options)
    api = make_client(config.cluster())
    if not options.force:
        warn_if_dangerous_change(api, job_spec, config)
    resp = api.update_job(config, options.health_check_interval_seconds,
                          options.shards)
    check_and_log_response(resp)
    def delete_job(self, cluster, role, environment, jobname, jobspec=None, instances=[]):
        """Method to delete aurora job"""

        job_key = AuroraJobKey(cluster, role, environment, jobname)
        logger.info("request to delete => %s", job_key.to_path())

        instances = self.pack_instance_list(instances)
        try:
            config = self.make_job_config(job_key, jobspec)
        except Exception as e:
            return(job_key.to_path(), ["Failed to delete Aurora job",
                                       "Can not create job configuration object because", str(e)])

        api = make_client(job_key.cluster)
        resp = api.kill_job(job_key, config=config, instances=instances)
        if resp.responseCode != ResponseCode.OK:
            logger.warning("aurora -- kill job failed")
            responseStr = self.response_string(resp)
            logger.warning(responseStr)
            return(job_key.to_path(), [], ["Error reported by aurora client:", responseStr])

        logger.info("aurora -- kill job successful")
        return(job_key.to_path(), [job_key.to_path()], None)
Exemplo n.º 28
0
def list_jobs(cluster_and_role):
    """usage: list_jobs [--show-cron] cluster/role/env/job

  Shows all jobs that match the job-spec known by the scheduler.
  If --show-cron is specified, then also shows the registered cron schedule.
  """
    def show_job_simple(job):
        if options.show_cron_schedule:
            print(('{0}/{1.key.role}/{1.key.environment}/{1.key.name}' +
                   '\t\'{1.cronSchedule}\'\t{1.cronCollisionPolicy}').format(
                       cluster, job))
        else:
            print('{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(
                cluster, job))

    def show_job_pretty(job):
        print("Job %s/%s/%s/%s:" %
              (cluster, job.key.role, job.key.environment, job.key.name))
        print('\tcron schedule: %s' % job.cronSchedule)
        print('\tcron policy:   %s' % job.cronCollisionPolicy)

    options = app.get_options()
    v1_deprecation_warning("list_jobs", ["job", "list", cluster_and_role])

    if options.show_cron_schedule and options.pretty:
        print_fn = show_job_pretty
    else:
        print_fn = show_job_simple
    # Take the cluster_and_role parameter, and split it into its two components.
    if cluster_and_role.count('/') != 1:
        die('list_jobs parameter must be in cluster/role format')
    cluster, role = cluster_and_role.split('/')
    api = make_client(cluster)
    resp = api.get_jobs(role)
    check_and_log_response(resp)
    for job in resp.result.getJobsResult.configs:
        print_fn(job)
    def list_jobs(self, cluster, role):
        """Method to execute [ aurora list_jobs cluster/role command ]"""

        def job_string(cluster, job):
            return '{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(cluster, job)

        jobkey = self.make_job_key(cluster, role)
        logger.info("request to list jobs = %s" % jobkey)

        api = make_client(cluster)
        resp = api.get_jobs(role)
        if resp.responseCode != ResponseCode.OK:
            logger.warning("Failed to list Aurora jobs")
            responseStr = self.response_string(resp)
            logger.warning(responseStr)
            return(jobkey, [], ["Failed to list Aurora jobs", responseStr])

        jobs = [ job_string(cluster, job) for job in resp.result.getJobsResult.configs ]
        if len(jobs) == 0:
            logger.info("no jobs found for key = %s" % jobkey)
        for s in jobs:
            logger.info("> %s" % s )

        return(jobkey, jobs, None)
Exemplo n.º 30
0
def diff(job_spec, config_file):
    """usage: diff cluster/role/env/job config

  Compares a job configuration against a running job.
  By default the diff will be displayed using 'diff', though you may choose an alternate
  diff program by specifying the DIFF_VIEWER environment variable."""
    options = app.get_options()

    newcmd = ["job", "diff", job_spec, config_file]
    if options.json:
        newcmd.append("--read-json")

    v1_deprecation_warning("diff", newcmd)

    config = get_job_config(job_spec, config_file, options)
    if options.rename_from:
        cluster, role, env, name = options.rename_from
    else:
        cluster = config.cluster()
        role = config.role()
        env = config.environment()
        name = config.name()
    api = make_client(cluster)
    resp = api.query(
        api.build_query(role, name, statuses=ACTIVE_STATES, env=env))
    if resp.responseCode != ResponseCode.OK:
        die('Request failed, server responded with "%s"' %
            resp.messageDEPRECATED)
    remote_tasks = [
        t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks
    ]
    resp = api.populate_job_config(config)
    if resp.responseCode != ResponseCode.OK:
        die('Request failed, server responded with "%s"' %
            resp.messageDEPRECATED)
    local_tasks = resp.result.populateJobResult.populatedDEPRECATED

    pp = pprint.PrettyPrinter(indent=2)

    def pretty_print_task(task):
        # The raw configuration is not interesting - we only care about what gets parsed.
        task.configuration = None
        task.executorConfig = ExecutorConfig(name=AURORA_EXECUTOR_NAME,
                                             data=json.loads(
                                                 task.executorConfig.data))
        return pp.pformat(vars(task))

    def pretty_print_tasks(tasks):
        return ',\n'.join([pretty_print_task(t) for t in tasks])

    def dump_tasks(tasks, out_file):
        out_file.write(pretty_print_tasks(tasks))
        out_file.write('\n')
        out_file.flush()

    diff_program = os.environ.get('DIFF_VIEWER', 'diff')
    with NamedTemporaryFile() as local:
        dump_tasks(local_tasks, local)
        with NamedTemporaryFile() as remote:
            dump_tasks(remote_tasks, remote)
            result = subprocess.call([diff_program, remote.name, local.name])
            # Unlike most commands, diff doesn't return zero on success; it returns
            # 1 when a successful diff is non-empty.
            if result != 0 and result != 1:
                return result
            else:
                return 0
Exemplo n.º 31
0
def test_make_client_defaults_to_hooks_enabled():
    with patch('apache.aurora.client.factory.CLUSTERS', new=TEST_CLUSTERS):
        assert isinstance(make_client(TEST_CLUSTER, 'some-user-agent'),
                          HookedAuroraClientAPI)
Exemplo n.º 32
0
def ssh(args, options):
  """usage: ssh cluster/role/env/job shard [args...]

  Initiate an SSH session on the machine that a shard is running on.
  """
  if not args:
    die('Job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))
  if not args:
    die('Shard is required')
  try:
    shard = int(args.pop(0))
  except ValueError:
    die('Shard must be an integer')

  newcmd = ["task", "ssh", "%s/%s" % (job_path, shard)]
  if len(options.tunnels) > 0:
    newcmd.append("--tunnels=%s" % options.tunnels)
  if options.ssh_user is not None:
    newcmd.append("--ssh-user=%s" % options.ssh_user)
  if options.executor_sandbox:
    newcmd.append("--executor-sandbox")
  if len(args) > 0:
    newcmd.append("--command=\"%s\"" % " ".join(args))
  v1_deprecation_warning("ssh", newcmd)

  api = make_client(cluster_name)
  resp = api.query(api.build_query(role, name, set([int(shard)]), env=env))
  check_and_log_response(resp)

  if (resp.result.scheduleStatusResult.tasks is None or
      len(resp.result.scheduleStatusResult.tasks) == 0):
    die("Job %s not found" % job_path)
  first_task = resp.result.scheduleStatusResult.tasks[0]
  remote_cmd = 'bash' if not args else ' '.join(args)
  command = DistributedCommandRunner.substitute(remote_cmd, first_task,
      api.cluster, executor_sandbox=options.executor_sandbox)

  ssh_command = ['ssh', '-t']


  role = first_task.assignedTask.task.owner.role
  slave_host = first_task.assignedTask.slaveHost

  for tunnel in options.tunnels:
    try:
      port, name = tunnel.split(':')
      port = int(port)
    except ValueError:
      die('Could not parse tunnel: %s.  Must be of form PORT:NAME' % tunnel)
    if name not in first_task.assignedTask.assignedPorts:
      die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name))
    ssh_command += [
        '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])]

  ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command]
  return subprocess.call(ssh_command)
Exemplo n.º 33
0
def test_make_client_hooks_disabled():
  with patch('apache.aurora.client.factory.CLUSTERS', new=TEST_CLUSTERS):
    client = make_client(TEST_CLUSTER, 'some-user-agent', enable_hooks=False)
    assert not isinstance(client, HookedAuroraClientAPI)
    assert isinstance(client, AuroraClientAPI)
Exemplo n.º 34
0
def test_make_client_defaults_to_hooks_enabled():
  with patch('apache.aurora.client.factory.CLUSTERS', new=TEST_CLUSTERS):
    assert isinstance(make_client(TEST_CLUSTER, 'some-user-agent'), HookedAuroraClientAPI)
Exemplo n.º 35
0
 def get_api(self, cluster):
   """Creates an API object for a specified cluster"""
   return make_client(cluster)
Exemplo n.º 36
0
def make_admin_client(cluster):
  return make_client(cluster, AURORA_ADMIN_USER_AGENT_NAME)