def paasta_rollback(args): """Call mark_for_deployment with rollback parameters :param args: contains all the arguments passed onto the script: service, deploy groups and sha. These arguments will be verified and passed onto mark_for_deployment. """ service = figure_out_service_name(args) git_url = get_git_url(service) commit = args.commit given_deploy_groups = [deploy_group for deploy_group in args.deploy_groups.split(",") if deploy_group] service_deploy_groups = set(config.get_deploy_group() for config in get_instance_config_for_service( soa_dir=DEFAULT_SOA_DIR, service=service, )) deploy_groups, invalid = validate_given_deploy_groups(service_deploy_groups, given_deploy_groups) if len(invalid) > 0: print PaastaColors.yellow("These deploy groups are not valid and will be skipped: %s.\n" % (",").join(invalid)) if len(deploy_groups) == 0: print PaastaColors.red("ERROR: No valid deploy groups specified for %s.\n" % (service)) returncode = 1 for deploy_group in deploy_groups: returncode = mark_for_deployment( git_url=git_url, service=service, deploy_group=deploy_group, commit=commit, ) sys.exit(returncode)
def status_mesos_tasks(service, instance, normal_instance_count): job_id = marathon_tools.format_job_id(service, instance) running_and_active_tasks = get_running_tasks_from_active_frameworks(job_id) count = len(running_and_active_tasks) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') return "Mesos: %s - %s tasks in the %s state." % (status, count, running_string)
def test_format_chronos_job_one_mesos_task(): example_job = {} desired_state = '' running_tasks = ['slay the nemean lion'] verbose = False actual = chronos_serviceinit.format_chronos_job_status(example_job, desired_state, running_tasks, verbose) assert PaastaColors.yellow('Running') in actual
def marathon_mesos_status_summary(mesos_task_count, expected_instance_count) -> str: if mesos_task_count >= expected_instance_count: status = PaastaColors.green("Healthy") count_str = PaastaColors.green( "(%d/%d)" % (mesos_task_count, expected_instance_count)) elif mesos_task_count == 0: status = PaastaColors.red("Critical") count_str = PaastaColors.red( "(%d/%d)" % (mesos_task_count, expected_instance_count)) else: status = PaastaColors.yellow("Warning") count_str = PaastaColors.yellow( "(%d/%d)" % (mesos_task_count, expected_instance_count)) running_string = PaastaColors.bold("TASK_RUNNING") return f"Mesos: {status} - {count_str} tasks in the {running_string} state."
def perform_http_healthcheck(url, timeout): """Returns true if healthcheck on url succeeds, false otherwise :param url: the healthcheck url :param timeout: timeout in seconds :returns: True if healthcheck succeeds within number of seconds specified by timeout, false otherwise """ try: with Timeout(seconds=timeout): try: res = requests.head(url) except requests.ConnectionError: return (False, "http request failed: connection failed") except TimeoutError: return (False, "http request timed out after %d seconds" % timeout) if 'content-type' in res.headers and ',' in res.headers['content-type']: sys.stdout.write(PaastaColors.yellow( "Multiple content-type headers detected in response." " The Mesos healthcheck system will treat this as a failure!")) return (False, "http request succeeded, code %d" % res.status_code) # check if response code is valid per https://mesosphere.github.io/marathon/docs/health-checks.html elif res.status_code >= 200 and res.status_code < 400: return (True, "http request succeeded, code %d" % res.status_code) elif res.status_code >= 400: return (False, "http request failed, code %d" % res.status_code)
def get_cmd_string(): """Returns get_cmd() with some formatting and explanation.""" cmd = get_dockerfile_cmd() return ( 'You are in interactive mode, which may not run the exact command\n' 'that PaaSTA would have run. Here is the command from the Dockerfile:\n' '%s\n' % PaastaColors.yellow(cmd))
def run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout ): """Performs healthcheck on a container :param container_id: Docker container id :param healthcheck_mode: one of 'http', 'tcp', or 'cmd' :param healthcheck_data: a URL when healthcheck_mode is 'http' or 'tcp', a command if healthcheck_mode is 'cmd' :param timeout: timeout in seconds for individual check :returns: a tuple of (bool, output string) """ healthcheck_result = (False, "unknown") if healthcheck_mode == 'cmd': healthcheck_result = perform_cmd_healthcheck(docker_client, container_id, healthcheck_data, timeout) elif healthcheck_mode == 'http': healthcheck_result = perform_http_healthcheck(healthcheck_data, timeout) elif healthcheck_mode == 'tcp': healthcheck_result = perform_tcp_healthcheck(healthcheck_data, timeout) else: sys.stdout.write(PaastaColors.yellow( "Healthcheck mode '%s' is not currently supported!\n" % healthcheck_mode)) return healthcheck_result
def guess_instance(service, cluster, args): """Returns instance from args if available, otherwise uses 'main' if it is a valid instance, otherwise takes a good guess and returns the first instance available""" if args.instance: instance = args.instance else: try: instances = list_all_instances_for_service( service=service, clusters=[cluster], instance_type=None, soa_dir=args.yelpsoa_config_root) if 'main' in instances: instance = 'main' else: instance = list(instances)[0] except NoConfigurationForServiceError: sys.stdout.write( PaastaColors.red( 'Could not automatically detect instance to emulate. Please specify one with the --instance option.\n' )) sys.exit(2) sys.stdout.write( PaastaColors.yellow( 'Guessing instance configuration for %s. To override, use the --instance option.\n' % instance)) return instance
def test_format_chronos_job_status_no_last_run(): example_job = {"lastError": "", "lastSuccess": "", "schedule": "foo"} running_tasks = [] verbose = False actual = chronos_serviceinit.format_chronos_job_status(example_job, running_tasks, verbose) assert PaastaColors.yellow("New") in actual assert "(never)" in actual
def test_format_chronos_job_one_mesos_task(): example_job = {} running_tasks = ['slay the nemean lion'] verbose = False actual = chronos_serviceinit.format_chronos_job_status( example_job, running_tasks, verbose) assert PaastaColors.yellow('Running') in actual
def format_haproxy_backend_row(backend, is_correct_instance): """Pretty Prints the status of a given haproxy backend Takes the fields described in the CSV format of haproxy: http://www.haproxy.org/download/1.5/doc/configuration.txt And tries to make a good guess about how to represent them in text """ backend_name = backend['svname'] backend_hostname = backend_name.split("_")[-1] backend_port = backend_name.split("_")[0].split(":")[-1] pretty_backend_name = "%s:%s" % (backend_hostname, backend_port) if backend['status'] == "UP": status = PaastaColors.default(backend['status']) elif backend['status'] == 'DOWN' or backend['status'] == 'MAINT': status = PaastaColors.red(backend['status']) else: status = PaastaColors.yellow(backend['status']) lastcheck = "%s/%s in %sms" % (backend['check_status'], backend['check_code'], backend['check_duration']) lastchange = humanize.naturaltime( datetime.timedelta(seconds=int(backend['lastchg']))) row = ( ' %s' % pretty_backend_name, lastcheck, lastchange, status, ) if is_correct_instance: return row else: return tuple( PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row)
def run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout, ): """Performs healthcheck on a container :param container_id: Docker container id :param healthcheck_mode: one of 'http', 'https', 'tcp', or 'cmd' :param healthcheck_data: a URL when healthcheck_mode is 'http[s]' or 'tcp', a command if healthcheck_mode is 'cmd' :param timeout: timeout in seconds for individual check :returns: a tuple of (bool, output string) """ healthcheck_result = (False, "unknown") if healthcheck_mode == 'cmd': healthcheck_result = perform_cmd_healthcheck(docker_client, container_id, healthcheck_data, timeout) elif healthcheck_mode == 'http' or healthcheck_mode == 'https': healthcheck_result = perform_http_healthcheck(healthcheck_data, timeout) elif healthcheck_mode == 'tcp': healthcheck_result = perform_tcp_healthcheck(healthcheck_data, timeout) else: paasta_print( PaastaColors.yellow( "Healthcheck mode '%s' is not currently supported!" % healthcheck_mode, )) sys.exit(1) return healthcheck_result
def perform_http_healthcheck(url, timeout): """Returns true if healthcheck on url succeeds, false otherwise :param url: the healthcheck url :param timeout: timeout in seconds :returns: True if healthcheck succeeds within number of seconds specified by timeout, false otherwise """ try: with Timeout(seconds=timeout): try: res = requests.get(url, verify=False) except requests.ConnectionError: return (False, "http request failed: connection failed") except TimeoutError: return (False, "http request timed out after %d seconds" % timeout) if 'content-type' in res.headers and ',' in res.headers['content-type']: paasta_print( PaastaColors.yellow( "Multiple content-type headers detected in response." " The Mesos healthcheck system will treat this as a failure!", )) return (False, "http request succeeded, code %d" % res.status_code) # check if response code is valid per https://mesosphere.github.io/marathon/docs/health-checks.html elif res.status_code >= 200 and res.status_code < 400: return (True, "http request succeeded, code %d" % res.status_code) else: return (False, "http request failed, code %s" % str(res.status_code))
def run_healthcheck_on_container(docker_client, container_id, healthcheck_mode, healthcheck_data, timeout): """Performs healthcheck on a container :param container_id: Docker container id :param healthcheck_mode: one of 'http', 'tcp', or 'cmd' :param healthcheck_data: a URL when healthcheck_mode is 'http' or 'tcp', a command if healthcheck_mode is 'cmd' :param timeout: timeout in seconds for individual check :returns: true if healthcheck succeeds, false otherwise """ healthcheck_result = False if healthcheck_mode == 'cmd': healthcheck_result = perform_cmd_healthcheck(docker_client, container_id, healthcheck_data, timeout) elif healthcheck_mode == 'http': healthcheck_result = perform_http_healthcheck(healthcheck_data, timeout) elif healthcheck_mode == 'tcp': healthcheck_result = perform_tcp_healthcheck(healthcheck_data, timeout) else: sys.stdout.write( PaastaColors.yellow( "Healthcheck mode '%s' is not currently supported!\n" % healthcheck_mode)) return healthcheck_result
def perform_http_healthcheck(url, timeout): """Returns true if healthcheck on url succeeds, false otherwise :param url: the healthcheck url :param timeout: timeout in seconds :returns: True if healthcheck succeeds within number of seconds specified by timeout, false otherwise """ try: with Timeout(seconds=timeout): try: res = requests.head(url) except requests.ConnectionError: return False except TimeoutError: return False if 'content-type' in res.headers and ',' in res.headers['content-type']: sys.stdout.write( PaastaColors.yellow( "Multiple content-type headers detected in response." " The Mesos healthcheck system will treat this as a failure!")) return False # check if response code is valid per https://mesosphere.github.io/marathon/docs/health-checks.html elif res.status_code >= 200 and res.status_code < 400: return True
def format_haproxy_backend_row(backend, is_correct_instance): """Pretty Prints the status of a given haproxy backend Takes the fields described in the CSV format of haproxy: http://www.haproxy.org/download/1.5/doc/configuration.txt And tries to make a good guess about how to represent them in text """ backend_name = backend['svname'] backend_hostname = backend_name.split("_")[-1] backend_port = backend_name.split("_")[0].split(":")[-1] pretty_backend_name = "%s:%s" % (backend_hostname, backend_port) if backend['status'] == "UP": status = PaastaColors.default(backend['status']) elif backend['status'] == 'DOWN' or backend['status'] == 'MAINT': status = PaastaColors.red(backend['status']) else: status = PaastaColors.yellow(backend['status']) lastcheck = "%s/%s in %sms" % (backend['check_status'], backend['check_code'], backend['check_duration']) lastchange = humanize.naturaltime(datetime.timedelta(seconds=int(backend['lastchg']))) row = ( ' %s' % pretty_backend_name, lastcheck, lastchange, status, ) if is_correct_instance: return row else: return tuple(PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row)
def status_chronos_jobs(client, jobs, job_config, verbose): """Returns a formatted string of the status of a list of chronos jobs :param jobs: list of dicts of chronos job info as returned by the chronos client :param job_config: dict containing configuration about these jobs as provided by chronos_tools.load_chronos_job_config(). :param verbose: int verbosity level """ if jobs == []: return "%s: chronos job is not set up yet" % PaastaColors.yellow( "Warning") else: output = [] desired_state = job_config.get_desired_state_human() output.append("Desired: %s" % desired_state) for job in jobs: running_task_count = len( select_tasks_by_id( a_sync.block( get_cached_list_of_running_tasks_from_frameworks), job["name"], )) output.append( format_chronos_job_status(client, job, running_task_count, verbose)) return "\n".join(output)
def run_tasks_with_retries(executor_factory, task_config_factory, retries=0): # use max in case retries is negative, +1 for initial try tries_left = max(retries, 0) + 1 terminals = [] while tries_left > 0: print( PaastaColors.yellow(f"Scheduling task on Mesos (tries left: {tries_left})") ) try: executor = executor_factory() task_config = task_config_factory() terminal_event = run_task(executor, task_config) except (Exception, ValueError) as e: # implies an error with our code, and not with mesos, so just return # immediately print(f"Except while running executor stack: {e}") traceback.print_exc() terminals.append((None, task_config)) return terminals terminals.append((terminal_event, task_config)) if terminal_event.success: print(PaastaColors.green("Task finished successfully")) break else: # TODO: add reconciliation and other more specific behavior error_msg = get_terminal_event_error_message(terminal_event) print(PaastaColors.red(f"Task failed:\n{error_msg}")) tries_left -= 1 return terminals
def configure_and_run_docker_container( args, docker_img, instance_config, system_paasta_config, ): volumes = list() for volume in instance_config.get_volumes( system_paasta_config.get_volumes()): if os.path.exists(volume['hostPath']): volumes.append('%s:%s:%s' % (volume['hostPath'], volume['containerPath'], volume['mode'].lower())) else: paasta_print( PaastaColors.yellow( "Warning: Path %s does not exist on this host. Skipping this binding." % volume['hostPath'], ), ) volumes.append('%s:%s:rw' % (os.getcwd(), DEFAULT_SPARK_WORK_DIR)) if args.cmd is None: docker_cmd = instance_config.get_cmd() else: docker_cmd = args.cmd if docker_cmd is None: paasta_print( "A command is required, pyspark, spark-shell, spark-submit or jupyter", file=sys.stderr) return 1 # Changes at docker ENTRYPOINT or CMD does not work. elif docker_cmd == 'jupyter': docker_cmd = 'jupyter notebook -y --ip=%s --notebook-dir=%s --allow-root' % ( socket.getfqdn(), DEFAULT_SPARK_WORK_DIR, ) spark_ui_port = pick_random_port(args.service) container_name = 'paasta_spark_run_%s_%s' % (get_username(), spark_ui_port) # Do not put memory and CPU limits on Spark driver for now. # Toree won't work with the default memory-swap setting. environment = instance_config.get_env_dictionary() environment.update( get_spark_configuration( args, container_name, spark_ui_port, docker_img, system_paasta_config, ), ) return run_docker_container( container_name=container_name, volumes=volumes, environment=environment, docker_img=docker_img, docker_cmd=docker_cmd, dry_run=args.dry_run, )
def guess_instance(service, cluster, args): """Returns instance from args if available, otherwise uses 'main' if it is a valid instance, otherwise takes a good guess and returns the first instance available""" if args.instance: instance = args.instance else: try: instances = list_all_instances_for_service( service=service, clusters=[cluster], instance_type=None, soa_dir=args.yelpsoa_config_root ) if "main" in instances: instance = "main" else: instance = list(instances)[0] except NoConfigurationForServiceError: sys.stderr.write( PaastaColors.red( "Could not automatically detect instance to emulate. Please specify one with the --instance option.\n" ) ) sys.exit(2) sys.stderr.write( PaastaColors.yellow( "Guessing instance configuration for %s. To override, use the --instance option.\n" % instance ) ) return instance
def test_format_chronos_job_one_mesos_task(mock_status): example_job = {'name': 'my_service my_instance', 'schedule': 'foo'} running_tasks = ['slay the nemean lion'] verbose = False mock_client = mock.Mock() actual = chronos_serviceinit.format_chronos_job_status(mock_client, example_job, running_tasks, verbose) assert PaastaColors.yellow('Running') in actual
def build_smartstack_backends_table(backends): rows = [("Name", "LastCheck", "LastChange", "Status")] for backend in backends: if backend.status == "UP": status = PaastaColors.default(backend.status) elif backend.status == "DOWN": status = PaastaColors.red(backend.status) elif backend.status == "MAINT": status = PaastaColors.grey(backend.status) else: status = PaastaColors.yellow(backend.status) if backend.check_duration is None: check_duration = "" else: check_duration = str(backend.check_duration) row = ( f"{backend.hostname}:{backend.port}", f"{backend.check_status}/{backend.check_code} in {check_duration}ms", humanize.naturaltime(timedelta(seconds=backend.last_change)), status, ) if not backend.has_associated_task: row = tuple( PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row ) rows.append(row) return format_table(rows)
def _format_mesos_status(running_tasks): mesos_status = PaastaColors.red("UNKNOWN") num_tasks = len(running_tasks) if num_tasks == 0: mesos_status = PaastaColors.grey("Not running") else: mesos_status = PaastaColors.yellow("Running") return mesos_status
def _format_schedule(job): if job.get('parents') is not None: schedule = PaastaColors.yellow("None (Dependent Job).") else: schedule = job.get("schedule", PaastaColors.red("UNKNOWN")) epsilon = job.get("epsilon", PaastaColors.red("UNKNOWN")) formatted_schedule = "%s Epsilon: %s" % (schedule, epsilon) return formatted_schedule
def status_mesos_tasks(service, instance, normal_instance_count): job_id = marathon_tools.format_job_id(service, instance) # We have to add a spacer at the end to make sure we only return # things for service.main and not service.main_foo filter_string = "%s%s" % (job_id, marathon_tools.MESOS_TASK_SPACER) count = len(select_tasks_by_id(get_cached_list_of_running_tasks_from_frameworks(), filter_string)) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') return "Mesos: %s - %s tasks in the %s state." % (status, count, running_string)
def test_format_chronos_job_one_mesos_task(mock_status): example_job = {"name": "my_service my_instance", "schedule": "foo"} running_tasks = ["slay the nemean lion"] verbose = False mock_client = mock.Mock() actual = chronos_serviceinit.format_chronos_job_status( mock_client, example_job, running_tasks, verbose) assert PaastaColors.yellow("Running") in actual
def test_format_chronos_job_status_no_last_run(): example_job = {'lastError': '', 'lastSuccess': '', 'schedule': 'foo'} running_tasks = [] verbose = False actual = chronos_serviceinit.format_chronos_job_status( example_job, running_tasks, verbose) assert PaastaColors.yellow('New') in actual assert '(never)' in actual
def bouncing_status_human(app_count, bounce_method): if app_count == 0: return PaastaColors.red("Disabled") elif app_count == 1: return PaastaColors.green("Configured") elif app_count > 1: return PaastaColors.yellow("Bouncing (%s)" % bounce_method) else: return PaastaColors.red("Unknown (count: %s)" % app_count)
def status_marathon_job_human(service, instance, deploy_status, app_id, running_instances, normal_instance_count): name = PaastaColors.cyan(compose_job_id(service, instance)) if deploy_status != 'NotRunning': if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
def status_marathon_job_human(service, instance, deploy_status, app_id, running_instances, normal_instance_count): name = PaastaColors.cyan(compose_job_id(service, instance)) if deploy_status != 'NotRunning': if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status) else: status = PaastaColors.yellow("Warning") return "Marathon: %s - %s (app %s) is not configured in Marathon yet (waiting for bounce)" % ( status, name, app_id)
def status_mesos_tasks(service, instance, normal_instance_count): job_id = marathon_tools.format_job_id(service, instance) # We have to add a spacer at the end to make sure we only return # things for service.main and not service.main_foo filter_string = "%s%s" % (job_id, marathon_tools.MESOS_TASK_SPACER) running_and_active_tasks = get_running_tasks_from_active_frameworks(filter_string) count = len(running_and_active_tasks) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') return "Mesos: %s - %s tasks in the %s state." % (status, count, running_string)
def status_marathon_job_human( service, instance, deploy_status, app_id, running_instances, normal_instance_count, unused_offers_summary=None, ): name = PaastaColors.cyan(compose_job_id(service, instance)) if unused_offers_summary is not None and len(unused_offers_summary) > 0: stalled_str = "\n ".join([ "%s: %s times" % (k, n) for k, n in unused_offers_summary.items() ]) stall_reason = "\n Possibly stalled for:\n %s" % stalled_str else: stall_reason = "" if deploy_status != 'NotRunning': if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green( "(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red( "(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow( "(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s%s" % ( status, instance_count, deploy_status, stall_reason, ) else: status = PaastaColors.yellow("Warning") return "Marathon: %s - %s (app %s) is not configured in Marathon yet (waiting for bounce)%s" % ( status, name, app_id, stall_reason, )
def status_marathon_job(service, instance, app_id, normal_instance_count, client): name = PaastaColors.cyan(compose_job_id(service, instance)) if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) running_instances = app.tasks_running if len(app.deployments) == 0: deploy_status = PaastaColors.bold("Running") elif app.instances == 0 and app.tasks_running == 0: deploy_status = PaastaColors.grey("Stopped") else: # App is currently deploying so we should check the launch queue for more info is_overdue, backoff_seconds = marathon_tools.get_app_queue_status( client, app_id) if is_overdue: deploy_status = "%s (new tasks are not launching due to lack of capacity)" % PaastaColors.red( "Waiting") elif backoff_seconds: deploy_status = "%s (next task won't launch for %s seconds due to previous failures)" % ( PaastaColors.red("Delayed"), backoff_seconds) else: deploy_status = PaastaColors.yellow("Deploying") if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green( "(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red( "(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow( "(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % ( status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % ( status, name, app_id, red_not)
def paasta_rollback(args): """Call mark_for_deployment with rollback parameters :param args: contains all the arguments passed onto the script: service, deploy groups and sha. These arguments will be verified and passed onto mark_for_deployment. """ soa_dir = args.soa_dir service = figure_out_service_name(args, soa_dir) git_url = get_git_url(service, soa_dir) given_deploy_groups = {deploy_group for deploy_group in args.deploy_groups.split(",") if deploy_group} service_deploy_groups = {config.get_deploy_group() for config in get_instance_config_for_service( service=service, soa_dir=soa_dir, )} deploy_groups, invalid = validate_given_deploy_groups(service_deploy_groups, given_deploy_groups) if len(invalid) > 0: print PaastaColors.yellow("These deploy groups are not valid and will be skipped: %s.\n" % (",").join(invalid)) if len(deploy_groups) == 0: print PaastaColors.red("ERROR: No valid deploy groups specified for %s.\n" % (service)) return 1 commit = args.commit if not commit: list_previous_commits(service, deploy_groups, bool(given_deploy_groups), soa_dir) return 1 returncode = 0 for deploy_group in deploy_groups: returncode = max( mark_for_deployment( git_url=git_url, service=service, deploy_group=deploy_group, commit=commit, ), returncode, ) return returncode
def extract_args(args): try: system_paasta_config = load_system_paasta_config() except PaastaNotConfiguredError: paasta_print( PaastaColors.yellow( "Warning: Couldn't load config files from '/etc/paasta'. This indicates" "PaaSTA is not configured locally on this host, and remote-run may not behave" "the same way it would behave on a server configured for PaaSTA.", ), sep='\n', ) system_paasta_config = SystemPaastaConfig({"volumes": []}, '/etc/paasta') service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root) cluster = args.cluster or system_paasta_config.get_local_run_config().get('default_cluster', None) if not cluster: paasta_print( PaastaColors.red( "PaaSTA on this machine has not been configured with a default cluster." "Please pass one using '-c'.", ), sep='\n', file=sys.stderr, ) sys.exit(1) soa_dir = args.yelpsoa_config_root instance = args.instance if instance is None: instance_type = 'adhoc' instance = 'remote' else: instance_type = validate_service_instance( service, instance, cluster, soa_dir, ) if instance_type != 'adhoc': paasta_print( PaastaColors.red( ( "Please use instance declared in adhoc.yaml for use " "with remote-run, {} is declared as {}" ).format(instance, instance_type), ), ) sys.exit(1) return ( system_paasta_config, service, cluster, soa_dir, instance, instance_type, )
def test_format_chronos_job_status_no_last_run(): example_job = { 'lastError': '', 'lastSuccess': '', } running_tasks = [] verbose = False actual = chronos_serviceinit.format_chronos_job_status(example_job, running_tasks, verbose) assert PaastaColors.yellow('New') in actual assert '(never)' in actual
def _format_mesos_status(job, running_tasks): mesos_status = PaastaColors.red("UNKNOWN") num_tasks = len(running_tasks) if num_tasks == 0: mesos_status = PaastaColors.grey("Not running") elif num_tasks == 1: mesos_status = PaastaColors.yellow("Running") else: mesos_status = PaastaColors.red("Critical - %d tasks running (expected 1)" % num_tasks) return mesos_status
def paasta_rollback(args): """Call mark_for_deployment with rollback parameters :param args: contains all the arguments passed onto the script: service, cluster, instance and sha. These arguments will be verified and passed onto mark_for_deployment. """ service = figure_out_service_name(args) cluster = args.cluster git_url = get_git_url(service) commit = args.commit given_instances = args.instances.split(",") if cluster in list_clusters(service): service_instances = list_all_instances_for_service(service) instances, invalid = validate_given_instances(service_instances, given_instances) if len(invalid) > 0: print PaastaColors.yellow( "These instances are not valid and will be skipped: %s.\n" % (",").join(invalid)) if len(instances) is 0: print PaastaColors.red( "ERROR: No valid instances specified for %s.\n" % (service)) returncode = 1 for instance in instances: returncode = mark_for_deployment( git_url=git_url, cluster=cluster, instance=instance, service=service, commit=commit, ) else: print PaastaColors.red( "ERROR: The service %s is not deployed into cluster %s.\n" % (service, cluster)) returncode = 1 sys.exit(returncode)
def status_mesos_tasks( service: str, instance: str, normal_instance_count: int, verbose: int, ) -> str: job_id = marathon_tools.format_job_id(service, instance) # We have to add a spacer at the end to make sure we only return # things for service.main and not service.main_foo filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}" try: count = len( select_tasks_by_id( a_sync.block(get_cached_list_of_running_tasks_from_frameworks), filter_string)) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count_str = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count_str = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count_str = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') output = f"Mesos: {status} - {count_str} tasks in the {running_string} state." except ReadTimeout: return "Error: talking to Mesos timed out. It may be overloaded." if verbose > 0: tail_lines = calculate_tail_lines(verbose_level=verbose) output += '\n' + status_mesos_tasks_verbose( filter_string=filter_string, get_short_task_id=get_short_task_id, tail_lines=tail_lines, ) return output
def _format_schedule(job): if job.get('parents') is not None: schedule = PaastaColors.yellow("None (Dependent Job).") else: schedule = job.get("schedule", PaastaColors.red("UNKNOWN")) epsilon = job.get("epsilon", PaastaColors.red("UNKNOWN")) schedule_time_zone = job.get("scheduleTimeZone", "null") if schedule_time_zone == "null": # This is what Chronos returns. schedule_time_zone = "UTC" formatted_schedule = "%s (%s) Epsilon: %s" % (schedule, schedule_time_zone, epsilon) return formatted_schedule
def _cleanup_container(docker_client, container_id): sys.stdout.write("\nStopping and removing the old container %s...\n" % container_id) sys.stdout.write("(Please wait or you may leave an orphaned container.)\n") sys.stdout.flush() try: docker_client.stop(container_id) docker_client.remove_container(container_id) sys.stdout.write("...done\n") except errors.APIError: sys.stdout.write(PaastaColors.yellow( "Could not clean up container! You should stop and remove container '%s' manually.\n" % container_id))
def _format_schedule(job): if job.get('parents') is not None: schedule = PaastaColors.yellow("None (Dependent Job).") else: schedule = job.get("schedule", PaastaColors.red("UNKNOWN")) epsilon = job.get("epsilon", PaastaColors.red("UNKNOWN")) schedule_time_zone = job.get("scheduleTimeZone", "null") if schedule_time_zone == "null": # This is what Chronos returns. schedule_time_zone = "UTC" formatted_schedule = f"{schedule} ({schedule_time_zone}) Epsilon: {epsilon}" return formatted_schedule
def paasta_fsm(args): validate_args(args) (srvname, service_stanza, smartstack_stanza, monitoring_stanza, deploy_stanza, marathon_stanza, cluster_stanza, team) = ( get_paasta_config( args.yelpsoa_config_root, args.srvname, args.auto, args.port, args.team, args.description, args.external_link, ) ) srv = Service(srvname, args.yelpsoa_config_root) write_paasta_config( srv, service_stanza, smartstack_stanza, monitoring_stanza, deploy_stanza, marathon_stanza, cluster_stanza, ) print PaastaColors.yellow(" _ _(o)_(o)_ _") print PaastaColors.red(" ._\`:_ F S M _:' \_,") print PaastaColors.green(" / (`---'\ `-.") print PaastaColors.cyan(" ,-` _) (_,") print "With My Noodly Appendage I Have Written Configs For" print print PaastaColors.bold(" %s" % srvname) print print "Customize Them If It Makes You Happy -- http://y/paasta For Details" print "Remember To Add, Commit, And Push When You're Done:" print print "cd %s" % join(args.yelpsoa_config_root, srvname) print "# Review And/Or Customize Files" print "git add ." print "git commit -m'Initial Commit For %s'" % srvname print "git push origin HEAD # Pushmaster Or Ops Deputy Privs Required" print
def status_marathon_job(service, instance, app_id, normal_instance_count, client): name = PaastaColors.cyan(compose_job_id(service, instance)) if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) running_instances = app.tasks_running deploy_status = marathon_tools.get_marathon_app_deploy_status_human(app, app_id, client) if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
def get_bouncing_status(service, instance, client, job_config): apps = marathon_tools.get_matching_appids(service, instance, client) bounce_method = job_config.get_bounce_method() app_count = len(apps) if app_count == 0: return PaastaColors.red("Stopped") elif app_count == 1: return PaastaColors.green("Running") elif app_count > 1: return PaastaColors.yellow("Bouncing (%s)" % bounce_method) else: return PaastaColors.red("Unknown (count: %s)" % app_count)
def test_format_chronos_job_status_no_last_run(mock_status): example_job = { 'name': 'my_service my_instance', 'lastError': '', 'lastSuccess': '', 'schedule': 'foo' } running_tasks = [] verbose = False mock_client = mock.Mock() actual = chronos_serviceinit.format_chronos_job_status(mock_client, example_job, running_tasks, verbose) assert PaastaColors.yellow('New') in actual assert '(never)' in actual
def paasta_fsm(args): variables = get_paasta_config(yelpsoa_config_root=args.yelpsoa_config_root) destination = args.yelpsoa_config_root paasta_config = load_system_paasta_config() template = paasta_config.get_fsm_template() write_paasta_config( variables=variables, template=template, destination=destination, ) print PaastaColors.yellow(" _ _(o)_(o)_ _") print PaastaColors.red(" ._\`:_ F S M _:' \_,") print PaastaColors.green(" / (`---'\ `-.") print PaastaColors.cyan(" ,-` _) (_,") print "With My Noodly Appendage I Have Written Configs!" print print "Customize Them If It Makes You Happy -- http://y/paasta For Details" print "Remember To Add, Commit, And Push When You're Done:" print
def _prettify_status(status): if status not in ( chronos_tools.LastRunState.Fail, chronos_tools.LastRunState.Success, chronos_tools.LastRunState.NotRun, ): raise ValueError("Expected valid state, got %s" % status) if status == chronos_tools.LastRunState.Fail: return PaastaColors.red("Failed") elif status == chronos_tools.LastRunState.Success: return PaastaColors.green("OK") elif status == chronos_tools.LastRunState.NotRun: return PaastaColors.yellow("New")
def guess_cluster(service, args): """Returns the cluster from args if available, otherwise uses the "default" one""" if args.cluster: cluster = args.cluster else: try: cluster = get_default_cluster_for_service(service) except NoConfigurationForServiceError: sys.stdout.write(PaastaColors.red( 'Could not automatically detect cluster to emulate. Please specify one with the --cluster option.\n')) sys.exit(2) sys.stdout.write(PaastaColors.yellow( 'Guesing cluster configuration for %s. To override, use the --cluster option.\n' % cluster)) return cluster
def status_marathon_job(service, instance, app_id, normal_instance_count, client): name = PaastaColors.cyan(compose_job_id(service, instance)) if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) running_instances = app.tasks_running if len(app.deployments) == 0: deploy_status = PaastaColors.bold("Running") elif app.instances == 0 and app.tasks_running == 0: deploy_status = PaastaColors.grey("Stopped") else: # App is currently deploying so we should check the launch queue for more info is_overdue, backoff_seconds = marathon_tools.get_app_queue_status(client, app_id) if is_overdue: deploy_status = "%s (new tasks are not launching due to lack of capacity)" % PaastaColors.red("Waiting") elif backoff_seconds: deploy_status = "%s (next task won't launch for %s seconds due to previous failures)" % ( PaastaColors.red("Delayed"), backoff_seconds) else: deploy_status = PaastaColors.yellow("Deploying") if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
def _cleanup_container(docker_client, container_id): if docker_client.inspect_container(container_id)['State'].get('OOMKilled', False): sys.stderr.write(PaastaColors.red("Your service was killed by the OOM Killer!\n")) sys.stderr.write(PaastaColors.red( "You've exceeded the memory limit, try increasing the mem parameter in your soa_configs\n")) sys.stdout.write("\nStopping and removing the old container %s...\n" % container_id) sys.stdout.write("(Please wait or you may leave an orphaned container.)\n") sys.stdout.flush() try: docker_client.stop(container_id) docker_client.remove_container(container_id) sys.stdout.write("...done\n") except errors.APIError: sys.stdout.write(PaastaColors.yellow( "Could not clean up container! You should stop and remove container '%s' manually.\n" % container_id))
def paasta_rollback(args): """Call mark_for_deployment with rollback parameters :param args: contains all the arguments passed onto the script: service, cluster, instance and sha. These arguments will be verified and passed onto mark_for_deployment. """ service = figure_out_service_name(args) cluster = args.cluster git_url = get_git_url(service) commit = args.commit given_instances = args.instances.split(",") if cluster in list_clusters(service): service_instances = list_all_instances_for_service(service) instances, invalid = validate_given_instances(service_instances, given_instances) if len(invalid) > 0: print PaastaColors.yellow("These instances are not valid and will be skipped: %s.\n" % (",").join(invalid)) if len(instances) is 0: print PaastaColors.red("ERROR: No valid instances specified for %s.\n" % (service)) returncode = 1 for instance in instances: returncode = mark_for_deployment( git_url=git_url, cluster=cluster, instance=instance, service=service, commit=commit, ) else: print PaastaColors.red("ERROR: The service %s is not deployed into cluster %s.\n" % (service, cluster)) returncode = 1 sys.exit(returncode)
def status_chronos_jobs(jobs, job_config, verbose): """Returns a formatted string of the status of a list of chronos jobs :param jobs: list of dicts of chronos job info as returned by the chronos client :param job_config: dict containing configuration about these jobs as provided by chronos_tools.load_chronos_job_config(). """ if jobs == []: return "%s: chronos job is not set up yet" % PaastaColors.yellow("Warning") else: output = [] desired_state = job_config.get_desired_state_human() for job in jobs: running_tasks = get_running_tasks_from_active_frameworks(job["name"]) output.append(format_chronos_job_status(job, desired_state, running_tasks, verbose)) return "\n".join(output)
def get_marathon_app_deploy_status_human(app, app_id, client): status, delay = get_marathon_app_deploy_status(app, app_id, client) status_string = MarathonDeployStatus.tostring(status) if status == MarathonDeployStatus.Waiting: deploy_status = "%s (new tasks are not launching due to lack of capacity)" % PaastaColors.red(status_string) elif status == MarathonDeployStatus.Delayed: deploy_status = "%s (next task won't launch for %s seconds due to previous failures)" % ( PaastaColors.red(status_string), delay) elif status == MarathonDeployStatus.Deploying: deploy_status = PaastaColors.yellow(status_string) elif status == MarathonDeployStatus.Stopped: deploy_status = PaastaColors.grey(status_string) else: deploy_status = PaastaColors.bold(status_string) return deploy_status