async def format_stdstreams_tail_for_task(task, get_short_task_id, nlines=10): """Returns the formatted "tail" of stdout/stderr, for a given a task. :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. """ error_message = PaastaColors.red( " couldn't read stdout/stderr for %s (%s)") output = [] mesos_cli_config = get_mesos_config() try: fobjs = await aiter_to_list( cluster.get_files_for_tasks( task_list=[task], file_list=['stdout', 'stderr'], max_workers=mesos_cli_config["max_workers"], )) fobjs.sort(key=lambda fobj: fobj.path, reverse=True) if not fobjs: output.append( PaastaColors.blue(" no stdout/stderrr for %s" % get_short_task_id(task['id']))) return output for fobj in fobjs: output.append( PaastaColors.blue(" {} tail for {}".format( fobj.path, get_short_task_id(task['id'])))) # read nlines, starting from EOF tail = [] lines_seen = 0 if nlines > 0: async for line in fobj._readlines_reverse(): tail.append(line) lines_seen += 1 if lines_seen >= nlines: break # reverse the tail, so that EOF is at the bottom again if tail: output.extend(tail[::-1]) output.append(PaastaColors.blue(" %s EOF" % fobj.path)) except ( mesos_exceptions.MasterNotAvailableException, mesos_exceptions.SlaveDoesNotExist, mesos_exceptions.TaskNotFoundException, mesos_exceptions.FileNotFoundForTaskException, ) as e: output.append(error_message % (get_short_task_id(task['id']), str(e))) except TimeoutError: output.append(error_message % (get_short_task_id(task['id']), 'timeout')) return output
def format_stdstreams_tail_for_task(task, get_short_task_id, nlines=10): """Returns the formatted "tail" of stdout/stderr, for a given a task. :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. """ error_message = PaastaColors.red( " couldn't read stdout/stderr for %s (%s)") output = [] mesos_cli_config = get_mesos_config() try: fobjs = list( cluster.get_files_for_tasks( task_list=[task], file_list=['stdout', 'stderr'], max_workers=mesos_cli_config["max_workers"], )) fobjs.sort(key=lambda fobj: fobj.path, reverse=True) if not fobjs: output.append( PaastaColors.blue(" no stdout/stderrr for %s" % get_short_task_id(task['id']))) return output for fobj in fobjs: output.append( PaastaColors.blue(" %s tail for %s" % (fobj.path, get_short_task_id(task['id'])))) # read nlines, starting from EOF # mesos.cli is smart and can efficiently read a file backwards reversed_file = reversed(fobj) tail = [] for _ in range(nlines): line = next(reversed_file, None) if line is None: break tail.append(line) # reverse the tail, so that EOF is at the bottom again if tail: output.extend(tail[::-1]) output.append(PaastaColors.blue(" %s EOF" % fobj.path)) except ( mesos_exceptions.MasterNotAvailableException, mesos_exceptions.SlaveDoesNotExist, mesos_exceptions.TaskNotFoundException, mesos_exceptions.FileNotFoundForTaskException, ) as e: output.append(error_message % (get_short_task_id(task['id']), str(e))) except TimeoutError: output.append(error_message % (get_short_task_id(task['id']), 'timeout')) return output
def report_status_for_cluster(service, cluster, deploy_pipeline, actual_deployments, verbose=False): """With a given service and cluster, prints the status of the instances in that cluster""" # Get cluster.instance in the order in which they appear in deploy.yaml print print "cluster: %s" % cluster for namespace in deploy_pipeline: cluster_in_pipeline, instance = namespace.split('.') if cluster_in_pipeline != cluster: # This function only prints things that are relevant to cluster # We skip anything not in this cluster continue # Case: service deployed to cluster.instance if namespace in actual_deployments: unformatted_instance = instance instance = PaastaColors.blue(instance) version = actual_deployments[namespace][:8] # TODO: Perform sanity checks once per cluster instead of for each namespace status = execute_paasta_serviceinit_on_remote_master('status', cluster, service, unformatted_instance, verbose=verbose) # Case: service NOT deployed to cluster.instance else: instance = PaastaColors.red(instance) version = 'None' status = None print ' instance: %s' % instance print ' Git sha: %s' % version if status is not None: for line in status.rstrip().split('\n'): print ' %s' % line
def paasta_status_on_api_endpoint( cluster: str, service: str, instance: str, output: List[str], system_paasta_config: SystemPaastaConfig, verbose: int, ) -> int: client = get_paasta_api_client(cluster, system_paasta_config) if not client: paasta_print('Cannot get a paasta-api client') exit(1) try: status = client.service.status_instance(service=service, instance=instance).result() except HTTPError as exc: paasta_print(exc.response.text) return exc.status_code output.append(' instance: %s' % PaastaColors.blue(instance)) output.append(' Git sha: %s (desired)' % status.git_sha) if status.marathon is not None: return print_marathon_status(service, instance, output, status.marathon) elif status.kubernetes is not None: return print_kubernetes_status(service, instance, output, status.kubernetes) else: paasta_print( "Not implemented: Looks like %s is not a Marathon or Kubernetes instance" % instance) return 0
def failure(msg, link): """Format a paasta check failure message. :param msg: a string :return: a beautiful string """ return "%s %s %s" % (x_mark(), msg, PaastaColors.blue(link))
def report_status_for_cluster(service, cluster, deploy_pipeline, actual_deployments, instance_whitelist, verbose=0): """With a given service and cluster, prints the status of the instances in that cluster""" print print "cluster: %s" % cluster seen_instances = [] for namespace in deploy_pipeline: cluster_in_pipeline, instance = namespace.split('.') seen_instances.append(instance) if cluster_in_pipeline != cluster: continue if instance_whitelist and instance not in instance_whitelist: continue # Case: service deployed to cluster.instance if namespace in actual_deployments: formatted_instance = PaastaColors.blue(instance) version = actual_deployments[namespace][:8] # TODO: Perform sanity checks once per cluster instead of for each namespace status = execute_paasta_serviceinit_on_remote_master('status', cluster, service, instance, verbose=verbose) # Case: service NOT deployed to cluster.instance else: formatted_instance = PaastaColors.red(instance) version = 'None' status = None print ' instance: %s' % formatted_instance print ' Git sha: %s' % version if status is not None: for line in status.rstrip().split('\n'): print ' %s' % line print report_invalid_whitelist_values(instance_whitelist, seen_instances, 'instance')
def format_tail_lines_for_mesos_task(tail_lines, task_id): rows = [] if (tail_lines.stderr or tail_lines.stdout) is not None: if len(tail_lines.stderr) + len(tail_lines.stdout) == 0: rows.append( PaastaColors.blue(f" no stdout/stderrr for {task_id}")) else: for stdstream in ("stdout", "stderr"): rows.append( PaastaColors.blue(f"{stdstream} tail for {task_id}")) rows.extend(f" {line}" for line in getattr(tail_lines, stdstream, [])) elif tail_lines.error_message is not None: rows.append(PaastaColors.red(f" {tail_lines.error_message}")) return rows
def failure(msg, link): """Format a paasta check failure message. :param msg: a string :return: a beautiful string """ return "{} {} {}".format(x_mark(), msg, PaastaColors.blue(link))
def print_tron_status( service: str, instance: str, output: List[str], tron_status, verbose: int = 0, ) -> int: output.append(f" Tron job: {tron_status.job_name}") if verbose: output.append(f" Status: {tron_status.job_status}") output.append(f" Schedule: {tron_status.job_schedule}") output.append(" Dashboard: {}".format( PaastaColors.blue(tron_status.job_url))) output.append(f" Action: {tron_status.action_name}") output.append(f" Status: {tron_status.action_state}") if verbose: output.append(f" Start time: {tron_status.action_start_time}") output.append(f" Command: {tron_status.action_command}") if verbose > 1: output.append(f" Raw Command: {tron_status.action_raw_command}") output.append(f" Stdout: \n{tron_status.action_stdout}") output.append(f" Stderr: \n{tron_status.action_stderr}") return 0
def _auto_add_timeout_for_job(cmd, timeout_job_runtime): # Timeout only to be added for spark-submit commands # TODO: Add timeout for jobs using mrjob with spark-runner if "spark-submit" not in cmd: return cmd try: timeout_present = re.match( r"^.*timeout[\s]+[\d]*[m|h][\s]+spark-submit .*$", cmd ) if not timeout_present: split_cmd = cmd.split("spark-submit") cmd = f"{split_cmd[0]}timeout {timeout_job_runtime} spark-submit{split_cmd[1]}" print( PaastaColors.blue( f"NOTE: Job will exit in given time {timeout_job_runtime}. " f"Adjust timeout value using --timeout-job-timeout. " f"New Updated Command with timeout: {cmd}" ), ) except Exception as e: err_msg = ( f"'timeout' could not be added to command: '{cmd}' due to error '{e}'. " "Please report to #spark." ) log.warn(err_msg) print(PaastaColors.red(err_msg)) return cmd
def gen_output(task_id, file1, file2, nlines, raise_what): error_message = PaastaColors.red(" couldn't read stdout/stderr for %s (%s)") output = [] if not raise_what: files = [file1, file2] # reverse sort because stdout is supposed to always come before stderr in the output files.sort(key=lambda f: f[0], reverse=True) for f in files: output.append(PaastaColors.blue(" %s tail for %s" % (f[0], task_id))) output.extend(f[1][-nlines:]) output.append(PaastaColors.blue(" %s EOF" % f[0])) else: if raise_what == 'TimeoutError': raise_what = 'timeout' output.append(error_message % (task_id, raise_what)) return output
def paasta_status_on_api_endpoint(cluster, service, instance, system_paasta_config, verbose): client = get_paasta_api_client(cluster, system_paasta_config) if not client: paasta_print('Cannot get a paasta-api client') exit(1) try: status = client.service.status_instance(service=service, instance=instance).result() except HTTPError as exc: paasta_print(exc.response.text) return exc.status_code paasta_print('instance: %s' % PaastaColors.blue(instance)) paasta_print('Git sha: %s (desired)' % status.git_sha) marathon_status = status.marathon if marathon_status is None: paasta_print( "Not implemented: Looks like %s is not a Marathon instance" % instance) return 0 elif marathon_status.error_message: paasta_print(marathon_status.error_message) return 1 bouncing_status = bouncing_status_human( marathon_status.app_count, marathon_status.bounce_method, ) desired_state = desired_state_human( marathon_status.desired_state, marathon_status.expected_instance_count, ) paasta_print("State: %s - Desired state: %s" % (bouncing_status, desired_state)) status = MarathonDeployStatus.fromstring(marathon_status.deploy_status) if status != MarathonDeployStatus.NotRunning: if status == MarathonDeployStatus.Delayed: deploy_status = marathon_app_deploy_status_human( status, marathon_status.backoff_seconds) else: deploy_status = marathon_app_deploy_status_human(status) else: deploy_status = 'NotRunning' paasta_print( status_marathon_job_human( service=service, instance=instance, deploy_status=deploy_status, desired_app_id=marathon_status.app_id, app_count=marathon_status.app_count, running_instances=marathon_status.running_instance_count, normal_instance_count=marathon_status.expected_instance_count, ), ) return 0
def get_marathon_dashboard(client, dashboards, app_id): if dashboards is not None: base_url = dashboards.get(client) if base_url: url = "{}/ui/#/apps/%2F{}".format(base_url.rstrip('/'), app_id.lstrip('/')) return " Marathon dashboard: %s" % PaastaColors.blue(url) return " Marathon app ID: %s" % PaastaColors.bold(app_id)
def format_stdstreams_tail_for_task(task, get_short_task_id, nlines=10): """Returns the formatted "tail" of stdout/stderr, for a given a task. :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. """ error_message = PaastaColors.red(" couldn't read stdout/stderr for %s (%s)") output = [] mesos_cli_config = get_mesos_config() try: fobjs = list(cluster.get_files_for_tasks( task_list=[task], file_list=['stdout', 'stderr'], max_workers=mesos_cli_config["max_workers"] )) fobjs.sort(key=lambda fobj: fobj.path, reverse=True) if not fobjs: output.append(PaastaColors.blue(" no stdout/stderrr for %s" % get_short_task_id(task['id']))) return output for fobj in fobjs: output.append(PaastaColors.blue(" %s tail for %s" % (fobj.path, get_short_task_id(task['id'])))) # read nlines, starting from EOF # mesos.cli is smart and can efficiently read a file backwards reversed_file = reversed(fobj) tail = [] for _ in xrange(nlines): line = next(reversed_file, None) if line is None: break tail.append(line) # reverse the tail, so that EOF is at the bottom again if tail: output.extend(tail[::-1]) output.append(PaastaColors.blue(" %s EOF" % fobj.path)) except (mesos_exceptions.MasterNotAvailableException, mesos_exceptions.SlaveDoesNotExist, mesos_exceptions.TaskNotFoundException, mesos_exceptions.FileNotFoundForTaskException) as e: output.append(error_message % (get_short_task_id(task['id']), e.message)) except TimeoutError: output.append(error_message % (get_short_task_id(task['id']), 'timeout')) return output
def get_marathon_dashboard( client: marathon_tools.MarathonClient, dashboards: Dict[marathon_tools.MarathonClient, str], app_id: str, ) -> str: if dashboards is not None: base_url = dashboards.get(client) if base_url: url = "{}/ui/#/apps/%2F{}".format(base_url.rstrip("/"), app_id.lstrip("/")) return " Marathon dashboard: %s" % PaastaColors.blue(url) return " Marathon app ID: %s" % PaastaColors.bold(app_id)
def paasta_status_on_api_endpoint( cluster: str, service: str, instance: str, output: List[str], system_paasta_config: SystemPaastaConfig, verbose: int, ) -> int: output.append(" instance: %s" % PaastaColors.blue(instance)) client = get_paasta_api_client(cluster, system_paasta_config) if not client: paasta_print("Cannot get a paasta-api client") exit(1) try: status = client.service.status_instance( service=service, instance=instance, verbose=verbose ).result() except HTTPError as exc: output.append(PaastaColors.red(exc.response.text)) return exc.status_code except (BravadoConnectionError, BravadoTimeoutError) as exc: output.append( PaastaColors.red(f"Could not connect to API: {exc.__class__.__name__}") ) return 1 except Exception: tb = sys.exc_info()[2] output.append(PaastaColors.red(f"Exception when talking to the API:")) output.extend(line.strip() for line in traceback.format_tb(tb)) return 1 if status.git_sha != "": output.append(" Git sha: %s (desired)" % status.git_sha) if status.marathon is not None: return print_marathon_status(service, instance, output, status.marathon) elif status.kubernetes is not None: return print_kubernetes_status(service, instance, output, status.kubernetes) elif status.tron is not None: return print_tron_status(service, instance, output, status.tron, verbose) elif status.adhoc is not None: return print_adhoc_status( cluster, service, instance, output, status.adhoc, verbose ) elif status.flink is not None: return print_flink_status( cluster, service, instance, output, status.flink, verbose ) else: paasta_print( "Not implemented: Looks like %s is not a Marathon or Kubernetes instance" % instance ) return 0
def format_stdstreams_tail_for_task(task, get_short_task_id, nlines=10): """Returns the formatted "tail" of stdout/stderr, for a given a task. :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. """ error_message = PaastaColors.red(" couldn't read stdout/stderr for %s (%s)") output = [] try: fobjs = list(mesos.cli.cluster.files(lambda x: x, flist=["stdout", "stderr"], fltr=task["id"])) fobjs.sort(key=lambda fobj: fobj.path, reverse=True) if not fobjs: output.append(PaastaColors.blue(" no stdout/stderrr for %s" % get_short_task_id(task["id"]))) return output for fobj in fobjs: output.append(PaastaColors.blue(" %s tail for %s" % (fobj.path, get_short_task_id(task["id"])))) # read nlines, starting from EOF # mesos.cli is smart and can efficiently read a file backwards reversed_file = reversed(fobj) tail = [] for _ in xrange(nlines): line = next(reversed_file, None) if line is None: break tail.append(line) # reverse the tail, so that EOF is at the bottom again if tail: output.extend(tail[::-1]) output.append(PaastaColors.blue(" %s EOF" % fobj.path)) except ( MasterNotAvailableException, SlaveNotAvailableException, TaskNotFoundException, FileNotFoundForTaskException, ) as e: output.append(error_message % (get_short_task_id(task["id"]), e.message)) except TimeoutError: output.append(error_message % (get_short_task_id(task["id"]), "timeout")) return output
def paasta_status_on_api_endpoint(cluster, service, instance, system_paasta_config, verbose): client = get_paasta_api_client(cluster, system_paasta_config) if not client: paasta_print('Cannot get a paasta-api client') exit(1) try: status = client.service.status_instance(service=service, instance=instance).result() except HTTPError as exc: paasta_print(exc.response.text) return exc.status_code paasta_print('instance: %s' % PaastaColors.blue(instance)) paasta_print('Git sha: %s (desired)' % status.git_sha) marathon_status = status.marathon if marathon_status is None: paasta_print("Not implemented: Looks like %s is not a Marathon instance" % instance) return 0 elif marathon_status.error_message: paasta_print(marathon_status.error_message) return 1 bouncing_status = bouncing_status_human(marathon_status.app_count, marathon_status.bounce_method) desired_state = desired_state_human(marathon_status.desired_state, marathon_status.expected_instance_count) paasta_print("State: %s - Desired state: %s" % (bouncing_status, desired_state)) status = MarathonDeployStatus.fromstring(marathon_status.deploy_status) if status != MarathonDeployStatus.NotRunning: if status == MarathonDeployStatus.Delayed: deploy_status = marathon_app_deploy_status_human(status, marathon_status.backoff_seconds) else: deploy_status = marathon_app_deploy_status_human(status) else: deploy_status = 'NotRunning' paasta_print( status_marathon_job_human( service, instance, deploy_status, marathon_status.app_id, marathon_status.running_instance_count, marathon_status.expected_instance_count, ) ) return 0
def gen_output(task_id, file1, file2, nlines, raise_what): error_message = " " + PaastaColors.red( " couldn't read stdout/stderr for %s (%s)") output = [] if not raise_what: files = [file1, file2] # reverse sort because stdout is supposed to always come before stderr in the output files.sort(key=lambda f: f[0], reverse=True) for f in files: output.append( " " + PaastaColors.blue("{} tail for {}".format(f[0], task_id))) output.extend(f" {line}" for line in f[1][-nlines:]) else: output.append(error_message % (task_id, raise_what.__name__)) return output
def paasta_status_on_api_endpoint( cluster: str, service: str, instance: str, output: List[str], system_paasta_config: SystemPaastaConfig, verbose: int, ) -> int: client = get_paasta_api_client(cluster, system_paasta_config) if not client: paasta_print("Cannot get a paasta-api client") exit(1) try: status = client.service.status_instance( service=service, instance=instance, verbose=verbose ).result() except HTTPError as exc: paasta_print(exc.response.text) return exc.status_code output.append(" instance: %s" % PaastaColors.blue(instance)) if status.git_sha != "": output.append(" Git sha: %s (desired)" % status.git_sha) if status.marathon is not None: return print_marathon_status(service, instance, output, status.marathon) elif status.kubernetes is not None: return print_kubernetes_status(service, instance, output, status.kubernetes) elif status.tron is not None: return print_tron_status(service, instance, output, status.tron, verbose) elif status.adhoc is not None: return print_adhoc_status( cluster, service, instance, output, status.adhoc, verbose ) elif status.flink is not None: return print_flink_status( cluster, service, instance, output, status.flink.get("status"), verbose ) elif status.chronos is not None: return print_chronos_status(output, status.chronos.output) else: paasta_print( "Not implemented: Looks like %s is not a Marathon or Kubernetes instance" % instance ) return 0
def paasta_status_on_api_endpoint(cluster, service, instance, system_paasta_config, verbose): client = get_paasta_api_client(cluster, system_paasta_config) if not client: print 'Cannot get a paasta-api client' exit(1) try: status = client.service.status_instance(service=service, instance=instance).result() except HTTPError as exc: print exc.response.text return print 'instance: %s' % PaastaColors.blue(instance) print 'Git sha: %s (desired)' % status.git_sha marathon_status = status.marathon if marathon_status.error_message: print marathon_status.error_message return bouncing_status = bouncing_status_human(marathon_status.app_count, marathon_status.bounce_method) desired_state = desired_state_human(marathon_status.desired_state, marathon_status.expected_instance_count) print "State: %s - Desired state: %s" % (bouncing_status, desired_state) status = MarathonDeployStatus.fromstring(marathon_status.deploy_status) if status != MarathonDeployStatus.NotRunning: if status == MarathonDeployStatus.Delayed: deploy_status = marathon_app_deploy_status_human(status, marathon_status.backoff_seconds) else: deploy_status = marathon_app_deploy_status_human(status) else: deploy_status = 'NotRunning' print status_marathon_job_human(service, instance, deploy_status, marathon_status.app_id, marathon_status.running_instance_count, marathon_status.expected_instance_count)
def main(): args = parse_args() if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) instances = [] return_codes = [] command = args.command if (args.service_instance): service_instance = args.service_instance service, instance, _, __ = decompose_job_id(service_instance) instances.append(instance) elif (args.service and args.instances): service = args.service instances = args.instances.split(',') else: log.error( "The name of service or the name of instance to inspect is missing. Exiting." ) sys.exit(1) # Setting up transparent cache for http API calls requests_cache.install_cache("paasta_serviceinit", backend="memory") cluster = load_system_paasta_config().get_cluster() actual_deployments = get_actual_deployments(service, args.soa_dir) for instance in instances: # For an instance, there might be multiple versions running, e.g. in crossover bouncing. # In addition, mesos master does not have information of a chronos service's git hash. # The git sha in deployment.json is simply used here. version = actual_deployments['.'.join((cluster, instance))][:8] print 'instance: %s' % PaastaColors.blue(instance) print 'Git sha: %s (desired)' % version try: instance_type = validate_service_instance(service, instance, cluster, args.soa_dir) if instance_type == 'marathon': return_code = marathon_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, app_id=args.app_id, delta=args.delta, ) elif instance_type == 'chronos': return_code = chronos_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, ) else: log.error( "I calculated an instance_type of %s for %s which I don't know how to handle." % (instance_type, compose_job_id(service, instance))) return_code = 1 except Exception: log.error( 'Exception raised while looking at service %s instance %s:' % (service, instance)) log.error(traceback.format_exc()) return_code = 1 return_codes.append(return_code) sys.exit(max(return_codes))
def info_mark() -> str: """ :return: string that can print an info symbol """ return PaastaColors.blue("\u2139")
def main(): args = parse_args() if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) instances = [] return_codes = [] command = args.command if (args.service_instance): service_instance = args.service_instance service, instance, _, __ = decompose_job_id(service_instance) instances.append(instance) elif (args.service and args.instances): service = args.service instances = args.instances.split(',') else: log.error("The name of service or the name of instance to inspect is missing. Exiting.") sys.exit(1) # Setting up transparent cache for http API calls requests_cache.install_cache("paasta_serviceinit", backend="memory") cluster = load_system_paasta_config().get_cluster() actual_deployments = get_actual_deployments(service, args.soa_dir) for instance in instances: # For an instance, there might be multiple versions running, e.g. in crossover bouncing. # In addition, mesos master does not have information of a chronos service's git hash. # The git sha in deployment.json is simply used here. version = actual_deployments['.'.join((cluster, instance))][:8] print 'instance: %s' % PaastaColors.blue(instance) print 'Git sha: %s (desired)' % version try: instance_type = validate_service_instance(service, instance, cluster, args.soa_dir) if instance_type == 'marathon': return_code = marathon_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, app_id=args.app_id, delta=args.delta, ) elif instance_type == 'chronos': return_code = chronos_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, ) else: log.error("I calculated an instance_type of %s for %s which I don't know how to handle." % (instance_type, compose_job_id(service, instance))) return_code = 1 except: log.error('Exception raised while looking at service %s instance %s:' % (service, instance)) log.error(traceback.format_exc()) return_code = 1 return_codes.append(return_code) sys.exit(max(return_codes))
def main(): args = parse_args() if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) instances = [] return_codes = [] command = args.command if (args.service_instance): service_instance = args.service_instance service, instance, _, __ = decompose_job_id(service_instance) instances.append(instance) elif (args.service and args.instances): service = args.service instances = args.instances.split(',') else: log.error( "The name of service or the name of instance to inspect is missing. Exiting." ) sys.exit(1) # Setting up transparent cache for http API calls requests_cache.install_cache("paasta_serviceinit", backend="memory") cluster = load_system_paasta_config().get_cluster() actual_deployments = get_actual_deployments(service, args.soa_dir) clients = PaastaClients(cached=(command == 'status')) for instance in instances: try: instance_type = validate_service_instance(service, instance, cluster, args.soa_dir) if instance_type == 'adhoc': continue version = get_deployment_version(actual_deployments, cluster, instance) paasta_print('instance: %s' % PaastaColors.blue(instance)) paasta_print('Git sha: %s (desired)' % version) if instance_type == 'marathon': return_code = marathon_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, app_id=args.app_id, delta=args.delta, client=clients.marathon(), ) elif instance_type == 'chronos': return_code = chronos_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, client=clients.chronos(), ) elif instance_type == 'paasta_native': return_code = paasta_native_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, ) else: log.error( "I calculated an instance_type of %s for %s which I don't know how to handle." % (instance_type, compose_job_id(service, instance))) return_code = 1 except Exception: log.error( 'Exception raised while looking at service %s instance %s:' % (service, instance)) log.error(traceback.format_exc()) return_code = 1 return_codes.append(return_code) sys.exit(max(return_codes))
def main() -> None: args = parse_args() if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) instances = [] return_codes = [] command = args.command if (args.service_instance): service_instance = args.service_instance service, instance, _, __ = decompose_job_id(service_instance) instances.append(instance) elif (args.service and args.instances): service = args.service instances = args.instances.split(',') else: log.error( "The name of service or the name of instance to inspect is missing. Exiting." ) sys.exit(1) # Setting up transparent cache for http API calls requests_cache.install_cache("paasta_serviceinit", backend="memory") cluster = load_system_paasta_config().get_cluster() actual_deployments = get_actual_deployments(service, args.soa_dir) clients = PaastaClients(cached=(command == 'status')) instance_types = ['marathon', 'chronos', 'paasta_native', 'adhoc'] instance_types_map: Dict[str, List[str]] = {it: [] for it in instance_types} for instance in instances: try: instance_type = validate_service_instance( service, instance, cluster, args.soa_dir, ) except Exception: log.error( ('Exception raised while looking at service %s instance %s:' ).format(service, instance), ) log.error(traceback.format_exc()) return_codes.append(1) continue if instance_type not in instance_types: log.error( ("I calculated an instance_type of {} for {} which I don't " "know how to handle.").format( instance_type, compose_job_id(service, instance), ), ) return_codes.append(1) else: instance_types_map[instance_type].append(instance) remote_run_frameworks = None if len(instance_types_map['adhoc']) > 0: remote_run_frameworks = paasta_remote_run.remote_run_frameworks() service_config_loader = PaastaServiceConfigLoader(service) for instance_type in instance_types: if instance_type == 'marathon': job_configs = { jc.instance: jc for jc in service_config_loader.instance_configs( cluster=cluster, instance_type_class=marathon_tools.MarathonServiceConfig, ) } for instance in instance_types_map[instance_type]: try: version = get_deployment_version( actual_deployments, cluster, instance, ) paasta_print('instance: %s' % PaastaColors.blue(instance)) paasta_print('Git sha: %s (desired)' % version) if instance_type == 'marathon': return_code = marathon_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, app_id=args.app_id, clients=clients.marathon(), job_config=job_configs[instance], ) elif instance_type == 'chronos': return_code = chronos_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, client=clients.chronos(), ) elif instance_type == 'paasta_native': return_code = paasta_native_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, ) elif instance_type == 'adhoc': if command != 'status': raise NotImplementedError paasta_remote_run.remote_run_list_report( service=service, instance=instance, cluster=cluster, frameworks=remote_run_frameworks, ) return_code = 0 except Exception: log.error(('Exception raised while looking at service {} ' 'instance {}:').format(service, instance), ) log.error(traceback.format_exc()) return_code = 1 return_codes.append(return_code) sys.exit(max(return_codes))
def paasta_spark_run(args): # argparse does not work as expected with both default and # type=validate_work_dir. validate_work_dir(args.work_dir) try: system_paasta_config = load_system_paasta_config() except PaastaNotConfiguredError: print( PaastaColors.yellow( "Warning: Couldn't load config files from '/etc/paasta'. This indicates" "PaaSTA is not configured locally on this host, and local-run may not behave" "the same way it would behave on a server configured for PaaSTA." ), sep="\n", ) system_paasta_config = SystemPaastaConfig({"volumes": []}, "/etc/paasta") if args.cmd == "jupyter-lab" and not args.build and not args.image: print( PaastaColors.red( "The jupyter-lab command requires a prebuilt image with -I or --image." ), file=sys.stderr, ) return 1 # Use the default spark:client instance configs if not provided try: instance_config = get_instance_config( service=args.service, instance=args.instance, cluster=system_paasta_config.get_cluster_aliases().get( args.cluster, args.cluster ), load_deployments=args.build is False and args.image is None, soa_dir=args.yelpsoa_config_root, ) except NoConfigurationForServiceError as e: print(str(e), file=sys.stderr) return 1 except NoDeploymentsAvailable: print( PaastaColors.red( "Error: No deployments.json found in %(soa_dir)s/%(service)s." "You can generate this by running:" "generate_deployments_for_service -d %(soa_dir)s -s %(service)s" % {"soa_dir": args.yelpsoa_config_root, "service": args.service} ), sep="\n", file=sys.stderr, ) return 1 if not args.cmd and not instance_config.get_cmd(): print( "A command is required, pyspark, spark-shell, spark-submit or jupyter", file=sys.stderr, ) return 1 aws_creds = get_aws_credentials( service=args.service, no_aws_credentials=args.no_aws_credentials, aws_credentials_yaml=args.aws_credentials_yaml, profile_name=args.aws_profile, ) docker_image = get_docker_image(args, instance_config) if docker_image is None: return 1 pod_template_path = generate_pod_template_path() args.enable_compact_bin_packing = should_enable_compact_bin_packing( args.disable_compact_bin_packing, args.cluster_manager ) volumes = instance_config.get_volumes(system_paasta_config.get_volumes()) app_base_name = get_spark_app_name(args.cmd or instance_config.get_cmd()) if args.enable_compact_bin_packing: document = POD_TEMPLATE.format( spark_pod_label=limit_size_with_hash(f"exec-{app_base_name}"), ) parsed_pod_template = yaml.load(document) with open(pod_template_path, "w") as f: yaml.dump(parsed_pod_template, f) needs_docker_cfg = not args.build user_spark_opts = _parse_user_spark_args( args.spark_args, pod_template_path, args.enable_compact_bin_packing ) args.cmd = _auto_add_timeout_for_job(args.cmd, args.timeout_job_runtime) # This is required if configs are provided as part of `spark-submit` # Other way to provide is with --spark-args sub_cmds = args.cmd.split(" ") # spark.driver.memory=10g for cmd in sub_cmds: if cmd.startswith("spark.driver.memory") or cmd.startswith( "spark.driver.cores" ): key, value = cmd.split("=") user_spark_opts[key] = value paasta_instance = get_smart_paasta_instance_name(args) auto_set_temporary_credentials_provider = ( args.disable_temporary_credentials_provider is False ) spark_conf = get_spark_conf( cluster_manager=args.cluster_manager, spark_app_base_name=app_base_name, docker_img=docker_image, user_spark_opts=user_spark_opts, paasta_cluster=args.cluster, paasta_pool=args.pool, paasta_service=args.service, paasta_instance=paasta_instance, extra_volumes=volumes, aws_creds=aws_creds, needs_docker_cfg=needs_docker_cfg, auto_set_temporary_credentials_provider=auto_set_temporary_credentials_provider, ) # Experimental: TODO: Move to service_configuration_lib once confirmed that there are no issues # Enable AQE: Adaptive Query Execution if "spark.sql.adaptive.enabled" not in spark_conf: spark_conf["spark.sql.adaptive.enabled"] = "true" aqe_msg = "Spark performance improving feature Adaptive Query Execution (AQE) is enabled. Set spark.sql.adaptive.enabled as false to disable." log.info(aqe_msg) print(PaastaColors.blue(aqe_msg)) return configure_and_run_docker_container( args, docker_img=docker_image, instance_config=instance_config, system_paasta_config=system_paasta_config, spark_conf=spark_conf, aws_creds=aws_creds, cluster_manager=args.cluster_manager, pod_template_path=pod_template_path, )