def run_cleanup_marathon_job(context, flags, expected_return_code): cmd = '../paasta_tools/cleanup_marathon_jobs.py --soa-dir %s %s' % (context.soa_dir, flags) paasta_print('Running cmd %s' % (cmd)) exit_code, output = _run(cmd) paasta_print(output) assert exit_code == int(expected_return_code)
def test_group_slaves_by_key_func(): slaves = [ { 'id': 'somenametest-slave', 'hostname': 'test.somewhere.www', 'resources': { 'cpus': 75, 'disk': 250, 'mem': 100, }, 'attributes': { 'habitat': 'somenametest-habitat', }, }, { 'id': 'somenametest-slave2', 'hostname': 'test2.somewhere.www', 'resources': { 'cpus': 500, 'disk': 200, 'mem': 750, }, 'attributes': { 'habitat': 'somenametest-habitat-2', }, }, ] actual = metastatus_lib.group_slaves_by_key_func( lambda x: x['attributes']['habitat'], slaves ) assert len(actual.items()) == 2 for k, v in actual.items(): paasta_print(k, v) assert len(list(v)) == 1
def validate_schema(file_path, file_type): """Check if the specified config file has a valid schema :param file_path: path to file to validate :param file_type: what schema type should we validate against """ schema = get_schema(file_type) if (schema is None): paasta_print('%s: %s' % (SCHEMA_NOT_FOUND, file_path)) return validator = Draft4Validator(schema, format_checker=FormatChecker()) basename = os.path.basename(file_path) extension = os.path.splitext(basename)[1] try: config_file = get_file_contents(file_path) except IOError: paasta_print('%s: %s' % (FAILED_READING_FILE, file_path)) return False if extension == '.yaml': config_file_object = yaml.load(config_file) elif extension == '.json': config_file_object = json.loads(config_file) else: config_file_object = config_file try: validator.validate(config_file_object) except ValidationError: paasta_print('%s: %s' % (SCHEMA_INVALID, file_path)) errors = validator.iter_errors(config_file_object) paasta_print(' Validation Message: %s' % exceptions.best_match(errors).message) else: paasta_print('%s: %s' % (SCHEMA_VALID, basename)) return True
def start_chronos_job(service, instance, job_id, client, cluster, job_config, complete_job_config, emergency=False): """ Calls the 'manual start' Chronos endpoint (https://mesos.github.io/chronos/docs/api.html#manually-starting-a-job), running the job now regardless of its 'schedule'. The job's "schedule" is unmodified. If a job is disabled, this function does not do anything. """ name = PaastaColors.cyan(job_id) # The job should be run immediately as long as the job is not disabled via the 'disabled' key in soa-configs or has # been previously stopped. if complete_job_config['disabled']: paasta_print(PaastaColors.red("You cannot emergency start a disabled job. Run `paasta start` first.")) else: log_reason = PaastaColors.red("EmergencyStart") if emergency else "Brutal bounce" _log( service=service, line="%s: Starting manual run of %s in Chronos" % (log_reason, name), component="deploy", level="event", cluster=cluster, instance=instance ) client.update(complete_job_config) client.run(job_id)
def do_GET(self): paasta_print("Got GET for %s" % self.path) try: FakeHTTPServer.paths.append(self.path) self.send_response(self.status_code) except Exception as e: paasta_print(e)
def main(): args = parse_args() jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster=args.cluster, soa_dir=args.soa_dir) # TODO use compose_job_id instead of constructing string once INTERNAL_SPACER deprecated composed = ['%s%s%s' % (name, chronos_tools.INTERNAL_SPACER, job) for name, job in jobs] paasta_print('\n'.join(composed)) sys.exit(0)
def launch_jobs(context, num_jobs, state, service, job): client = context.chronos_client jobs = [{ 'async': False, 'command': 'echo 1', 'epsilon': 'PT15M', 'name': compose_job_id(service, job), 'owner': 'paasta', 'disabled': True, 'schedule': 'R/2014-01-01T00:00:00Z/PT60M', } for x in range(0, int(num_jobs))] for job in jobs: try: paasta_print('attempting to create job %s' % job['name']) client.add(job) except Exception: paasta_print('Error creating test job: %s' % json.dumps(job)) raise # a 'configured' job is one which has had the appropriate # yelp-soa configs into place. # an 'unconfigured' job represents a job which may at one stage # been a configured chronos job, but no longer has the # corresponding configuration in place the target for. # 'unconfigured' jobs are the target for cleanup_chronos_jobs if state == "configured": context.configured_job_names = [job['name'] for job in jobs] elif state == "unconfigured": context.unconfigured_job_names = [job['name'] for job in jobs]
def cmd(command): stream = False timeout = 60 output = [] try: process = Popen(shlex.split(command), stdout=PIPE, stderr=STDOUT, stdin=None) process.name = command # start the timer if we specified a timeout if timeout: proctimer = threading.Timer(timeout, _timeout, (process,)) proctimer.start() for line in iter(process.stdout.readline, ''): if stream: paasta_print(line.rstrip('\n')) output.append(line.rstrip('\n')) # when finished, get the exit code returncode = process.wait() except OSError as e: output.append(e.strerror.rstrip('\n')) returncode = e.errno except (KeyboardInterrupt, SystemExit): # need to clean up the timing thread here if timeout: proctimer.cancel() raise else: # Stop the timer if timeout: proctimer.cancel() if returncode == -9: output.append("Command '%s' timed out (longer than %ss)" % (command, timeout)) return returncode, '\n'.join(output)
def app_has_tasks(client, app_id, expected_tasks, exact_matches_only=False): """A predicate function indicating whether an app has launched *at least* expected_tasks tasks. Raises a marathon.NotFoundError when no app with matching id is found. :param client: the marathon client :param app_id: the app_id to which the tasks should belong. The leading / that marathon appends to app_ids is added here. :param expected_tasks: the number of tasks to check for :param exact_matches_only: a boolean indicating whether we require exactly expected_tasks to be running :returns: a boolean indicating whether there are atleast expected_tasks tasks with an app id matching app_id """ app_id = "/%s" % app_id try: tasks = client.list_tasks(app_id=app_id) except NotFoundError: paasta_print("no app with id %s found" % app_id) raise paasta_print("app %s has %d of %d expected tasks" % (app_id, len(tasks), expected_tasks)) if exact_matches_only: return len(tasks) == expected_tasks else: return len(tasks) >= expected_tasks
def perform_http_healthcheck(url, timeout): """Returns true if healthcheck on url succeeds, false otherwise :param url: the healthcheck url :param timeout: timeout in seconds :returns: True if healthcheck succeeds within number of seconds specified by timeout, false otherwise """ try: with Timeout(seconds=timeout): try: res = requests.get(url) except requests.ConnectionError: return (False, "http request failed: connection failed") except TimeoutError: return (False, "http request timed out after %d seconds" % timeout) if 'content-type' in res.headers and ',' in res.headers['content-type']: paasta_print(PaastaColors.yellow( "Multiple content-type headers detected in response." " The Mesos healthcheck system will treat this as a failure!")) return (False, "http request succeeded, code %d" % res.status_code) # check if response code is valid per https://mesosphere.github.io/marathon/docs/health-checks.html elif res.status_code >= 200 and res.status_code < 400: return (True, "http request succeeded, code %d" % res.status_code) elif res.status_code >= 400: return (False, "http request failed, code %d" % res.status_code)
def run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout ): """Performs healthcheck on a container :param container_id: Docker container id :param healthcheck_mode: one of 'http', 'tcp', or 'cmd' :param healthcheck_data: a URL when healthcheck_mode is 'http' or 'tcp', a command if healthcheck_mode is 'cmd' :param timeout: timeout in seconds for individual check :returns: a tuple of (bool, output string) """ healthcheck_result = (False, "unknown") if healthcheck_mode == 'cmd': healthcheck_result = perform_cmd_healthcheck(docker_client, container_id, healthcheck_data, timeout) elif healthcheck_mode == 'http': healthcheck_result = perform_http_healthcheck(healthcheck_data, timeout) elif healthcheck_mode == 'tcp': healthcheck_result = perform_tcp_healthcheck(healthcheck_data, timeout) else: paasta_print(PaastaColors.yellow( "Healthcheck mode '%s' is not currently supported!" % healthcheck_mode)) sys.exit(1) return healthcheck_result
def main(): strings = [] for full_name, config in marathon_tools.get_all_namespaces(): if 'proxy_port' in config: strings.append('%s:%s' % (full_name, config['proxy_port'])) strings = sorted(strings) paasta_print("synapse_srv_namespaces=" + ','.join(strings)) sys.exit(0)
def git_repo_check(service): git_url = get_git_url(service) cmd = "git ls-remote %s" % git_url returncode, _ = _run(cmd, timeout=5) if returncode == 0: paasta_print(PaastaCheckMessages.GIT_REPO_FOUND) else: paasta_print(PaastaCheckMessages.git_repo_missing(git_url))
def print_log(line, requested_levels, raw_mode=False): """Mostly a stub to ease testing. Eventually this may do some formatting or something. """ if raw_mode: paasta_print(line, end=" ") # suppress trailing newline since scribereader already attached one else: paasta_print(prettify_log_line(line, requested_levels))
def docker_check(): """Check whether Dockerfile exists in service directory, and is valid. Prints suitable message depending on outcome""" docker_file_path = is_file_in_dir("Dockerfile", os.getcwd()) if docker_file_path: paasta_print(PaastaCheckMessages.DOCKERFILE_FOUND) else: paasta_print(PaastaCheckMessages.DOCKERFILE_MISSING)
def service_dir_check(service, soa_dir): """Check whether directory service exists in /nail/etc/services :param service: string of service name we wish to inspect """ try: validate_service_name(service, soa_dir) paasta_print(PaastaCheckMessages.service_dir_found(service, soa_dir)) except NoSuchService: paasta_print(PaastaCheckMessages.service_dir_missing(service, soa_dir))
def deploy_check(service_path): """Check whether deploy.yaml exists in service directory. Prints success or error message. :param service_path: path to a directory containing deploy.yaml""" if is_file_in_dir("deploy.yaml", service_path): paasta_print(PaastaCheckMessages.DEPLOY_YAML_FOUND) else: paasta_print(PaastaCheckMessages.DEPLOY_YAML_MISSING)
def figure_out_service_name(args, soa_dir=DEFAULT_SOA_DIR): """Figures out and validates the input service name""" service = args.service or guess_service_name() try: validate_service_name(service, soa_dir=soa_dir) except NoSuchService as service_not_found: paasta_print(service_not_found) exit(1) return service
def deploy_has_performance_check(service, soa_dir): pipeline = get_pipeline_config(service, soa_dir) steps = [step["step"] for step in pipeline] if "performance-check" in steps: paasta_print(PaastaCheckMessages.DEPLOY_PERFORMANCE_FOUND) return True else: paasta_print(PaastaCheckMessages.DEPLOY_PERFORMANCE_MISSING) return False
def write_paasta_config(variables, template, destination): paasta_print("Using cookiecutter template from %s" % template) cookiecutter( template=template, extra_context=variables, output_dir=destination, overwrite_if_exists=True, no_input=not sys.stdout.isatty(), )
def deploy_has_security_check(service, soa_dir): pipeline = get_pipeline_config(service, soa_dir) steps = [step["step"] for step in pipeline] if "security-check" in steps: paasta_print(PaastaCheckMessages.DEPLOY_SECURITY_FOUND) return True else: paasta_print(PaastaCheckMessages.DEPLOY_SECURITY_MISSING) return False
def check_exit_code(context, expected_exit_code): try: assert context.exit_code == expected_exit_code, \ "expected %d, got %d" % (expected_exit_code, context.exit_code) except AssertionError: # behave likes to back up by two lines and then print some stuff, which clobbers my output, so I stick some # extra newlines on here. paasta_print("Output of setup_chronos_job:\n" + context.output + "\n") raise
def is_safe_to_drain(hostname): """Checks if a host has healthy tasks running locally that have low replication in other places :param hostname: hostname to check :returns: True or False """ if not is_hostname_local(hostname): paasta_print("Due to the way is_safe_to_drain is implemented, it can only work on localhost.") return False return not are_local_tasks_in_danger()
def wait_for_deployment(service, deploy_group, git_sha, soa_dir, timeout): cluster_map = get_cluster_instance_map_for_service(soa_dir, service, deploy_group) if not cluster_map: line = "Couldn't find any instances for service {0} in deploy group {1}".format(service, deploy_group) _log( service=service, component='deploy', line=line, level='event' ) raise NoInstancesFound paasta_print("Waiting for deployment of {0} for '{1}' complete..." .format(git_sha, deploy_group)) for cluster in cluster_map.values(): cluster['deployed'] = 0 try: with Timeout(seconds=timeout): total_instances = sum([len(v["instances"]) for v in cluster_map.values()]) with progressbar.ProgressBar(maxval=total_instances) as bar: while True: for cluster, instances in cluster_map.items(): if cluster_map[cluster]['deployed'] != len(cluster_map[cluster]['instances']): cluster_map[cluster]['deployed'] = instances_deployed( cluster=cluster, service=service, instances=instances['instances'], git_sha=git_sha) if cluster_map[cluster]['deployed'] == len(cluster_map[cluster]['instances']): instance_csv = ", ".join(cluster_map[cluster]['instances']) paasta_print("Deploy to %s complete! (instances: %s)" % (cluster, instance_csv)) bar.update(sum([v["deployed"] for v in cluster_map.values()])) if all([cluster['deployed'] == len(cluster["instances"]) for cluster in cluster_map.values()]): sys.stdout.flush() break else: time.sleep(10) sys.stdout.flush() except TimeoutError: line = "\n\nTimed out after {0} seconds, waiting for {2} in {1} to be deployed by PaaSTA. \n\n"\ "This probably means the deploy hasn't suceeded. The new service might not be healthy or one "\ "or more clusters could be having issues.\n\n"\ "To debug: try running:\n\n"\ " paasta status -s {2} -vv\n"\ " paasta logs -s {2}\n\n"\ "to determine the cause.\n\n"\ "If the service is known to be slow to start you may wish to increase "\ "the timeout on this step.".format(timeout, deploy_group, service) _log( service=service, component='deploy', line=line, level='event' ) raise return True
def check_container_exec_instances(context, num): """Modern docker versions remove ExecIDs after they finished, but older docker versions leave ExecIDs behind. This test is for assering that the ExecIDs are cleaned up one way or another""" container_info = context.docker_client.inspect_container(context.running_container_id) if container_info['ExecIDs'] is None: execs = [] else: execs = container_info['ExecIDs'] paasta_print('Container info:\n%s' % container_info) assert len(execs) <= int(num)
def paasta_maintenance(): """Manipulate the maintenance state of a PaaSTA host. :returns: None """ args = parse_args() if args.verbose >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbose == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) action = args.action hostnames = args.hostname if action != 'status' and not hostnames: paasta_print("You must specify one or more hostnames") return start = args.start duration = args.duration ret = "Done" if action == 'drain': mesos_maintenance.drain(hostnames, start, duration) elif action == 'undrain': mesos_maintenance.undrain(hostnames) elif action == 'down': mesos_maintenance.down(hostnames) elif action == 'up': mesos_maintenance.up(hostnames) elif action == 'status': ret = "%s" % mesos_maintenance.status() elif action == 'schedule': ret = "%s" % mesos_maintenance.schedule() elif action == 'is_safe_to_drain': ret = is_safe_to_drain(hostnames[0]) elif action == 'is_safe_to_kill': ret = is_safe_to_kill(hostnames[0]) elif action == 'is_host_drained': ret = mesos_maintenance.is_host_drained(hostnames[0]) elif action == 'is_host_down': ret = mesos_maintenance.is_host_down(hostnames[0]) elif action == 'is_host_draining': ret = mesos_maintenance.is_host_draining(hostnames[0]) elif action == 'is_host_past_maintenance_start': ret = mesos_maintenance.is_host_past_maintenance_start(hostnames[0]) elif action == 'is_host_past_maintenance_end': ret = mesos_maintenance.is_host_past_maintenance_end(hostnames[0]) else: raise NotImplementedError("Action: '%s' is not implemented." % action) paasta_print(ret) return ret
def print_chronos_message(desired_state): if desired_state == "start": paasta_print( "'Start' will tell Chronos to start scheduling the job. " "If you need the job to start regardless of the schedule, use 'paasta emergency-start'." ) elif desired_state == "stop": paasta_print( "'Stop' for a Chronos job will cause the job to be disabled until the " "next deploy or a 'start' command is issued." )
def _clean_up_chronos_jobs(context): """ If a chronos client object exists, delete any jobs and wait for them to die """ if hasattr(context, 'chronos_client'): while len(context.chronos_client.list()) > 0: jobs = context.chronos_client.list() for job in jobs: paasta_print("after_scenario: Job %s is present in chronos. Deleting." % job['name']) context.chronos_client.delete(job['name']) time.sleep(1) if hasattr(context, 'jobs'): context.jobs = {}
def check_cleanup_chronos_jobs_output(context, expected_return_code): cmd = '../paasta_tools/cleanup_chronos_jobs.py --soa-dir %s' % context.soa_dir exit_code, output = _run(cmd) paasta_print(context.unconfigured_job_names) paasta_print('Got exitcode %s with output:\n%s' % (exit_code, output)) assert exit_code == int(expected_return_code) assert "Successfully Removed Tasks (if any were running) for:" in output assert "Successfully Removed Jobs:" in output for job in context.unconfigured_job_names: assert ' %s' % job in output
def _clean_up_paasta_native_frameworks(context): clear_mesos_tools_cache() # context.etc_paasta signals that we actually have configured the mesos-cli.json; without this, we don't know where # to connect to clean up paasta native frameworks. if hasattr(context, 'etc_paasta'): for framework in mesos_tools.get_mesos_master().frameworks(active_only=True): if framework.name.startswith('paasta '): paasta_print("cleaning up framework %s" % framework.name) try: mesos_tools.terminate_framework(framework.id) except requests.exceptions.HTTPError as e: paasta_print("Got exception when terminating framework %s: %s" % (framework.id, e))
def confirm_to_continue(cluster_service_instances, desired_state): paasta_print(f"You are about to {desired_state} the following instances:") paasta_print( "Either --instances or --clusters not specified. Asking for confirmation." ) i_count = 0 for cluster, services_instances in cluster_service_instances: for service, instances in services_instances.items(): for instance in instances.keys(): paasta_print(f"cluster = {cluster}, instance = {instance}") i_count += 1 if sys.stdin.isatty(): return choice.Binary( f"Are you sure you want to {desired_state} these {i_count} instances?", False, ).ask() return True
def check_metastatus_return_code_with_flags(context, flags, expected_return_code, expected_output): # We don't want to invoke the "paasta metastatus" wrapper because by # default it will check every cluster. This is also the way sensu invokes # this check. cmd = "python -m paasta_tools.paasta_metastatus%s" % flags paasta_print("Running cmd %s" % (cmd)) exit_code, output = _run(cmd) # we don't care about the colouring here, so remove any ansi escape sequences escaped_output = remove_ansi_escape_sequences(output) paasta_print(f"Got exitcode {exit_code} with output:\n{output}") paasta_print() assert exit_code == int(expected_return_code) assert expected_output in escaped_output
def verify_instances(args_instances: str, service: str, clusters: Sequence[str]) -> Sequence[str]: """Verify that a list of instances specified by user is correct for this service. :param args_instances: a list of instances. :param service: the service name :param cluster: a list of clusters :returns: a list of instances specified in args_instances without any exclusions. """ unverified_instances = args_instances.split(",") service_instances: Set[str] = list_all_instances_for_service( service, clusters=clusters) misspelled_instances: Sequence[str] = [ i for i in unverified_instances if i not in service_instances ] if misspelled_instances: suggestions: List[str] = [] for instance in misspelled_instances: matches = difflib.get_close_matches(instance, service_instances, n=5, cutoff=0.5) suggestions.extend(matches) # type: ignore suggestions = list(set(suggestions)) if clusters: message = "{} doesn't have any instances matching {} on {}.".format( service, ", ".join(sorted(misspelled_instances)), ", ".join(sorted(clusters)), ) else: message = "{} doesn't have any instances matching {}.".format( service, ", ".join(sorted(misspelled_instances))) paasta_print(PaastaColors.red(message)) if suggestions: paasta_print("Did you mean any of these?") for instance in sorted(suggestions): paasta_print(" %s" % instance) return unverified_instances
def validate_chronos(service_path): """Check that any chronos configurations are valid""" soa_dir, service = path_to_soa_dir_service(service_path) instance_type = 'chronos' chronos_spacer = paasta_tools.chronos_tools.INTERNAL_SPACER returncode = True if service.startswith(TMP_JOB_IDENTIFIER): paasta_print(( "Services using scheduled tasks cannot be named %s, as it clashes with the " "identifier used for temporary jobs" % TMP_JOB_IDENTIFIER )) return False for cluster in list_clusters(service, soa_dir, instance_type): services_in_cluster = get_services_for_cluster(cluster=cluster, instance_type='chronos', soa_dir=soa_dir) valid_services = {"%s%s%s" % (name, chronos_spacer, instance) for name, instance in services_in_cluster} for instance in list_all_instances_for_service( service=service, clusters=[cluster], instance_type=instance_type, soa_dir=soa_dir, ): cjc = load_chronos_job_config(service, instance, cluster, False, soa_dir) parents = cjc.get_parents() or [] checks_passed, check_msgs = cjc.validate() for parent in parents: if not check_parent_format(parent): continue if "%s%s%s" % (service, chronos_spacer, instance) == parent: checks_passed = False check_msgs.append("Job %s cannot depend on itself" % parent) elif parent not in valid_services: checks_passed = False check_msgs.append("Parent job %s could not be found" % parent) # Remove duplicate check_msgs unique_check_msgs = list(set(check_msgs)) if not checks_passed: paasta_print(invalid_chronos_instance(cluster, instance, "\n ".join(unique_check_msgs))) returncode = False else: paasta_print(valid_chronos_instance(cluster, instance)) return returncode
def pick_default_log_mode(args, log_reader, service, levels, components, clusters, instances): if log_reader.SUPPORTS_LINE_COUNT: paasta_print(PaastaColors.cyan( "Fetching the last 100 lines and applying filters..."), file=sys.stderr) log_reader.print_last_n_logs( service=service, line_count=100, levels=levels, components=components, clusters=clusters, instances=instances, raw_mode=args.raw_mode, ) return 0 elif log_reader.SUPPORTS_TIME: start_time, end_time = generate_start_end_time() paasta_print(PaastaColors.cyan( "Fetching a specific time period and applying filters..."), file=sys.stderr) log_reader.print_logs_by_time( service=service, start_time=start_time, end_time=end_time, levels=levels, components=components, clusters=clusters, instances=instances, raw_mode=args.raw_mode, ) return 0 elif log_reader.SUPPORTS_TAILING: paasta_print(PaastaColors.cyan("Tailing logs and applying filters..."), file=sys.stderr) log_reader.tail_logs( service=service, levels=levels, components=components, clusters=clusters, instances=instances, raw_mode=args.raw_mode, ) return 0
def verify_instances(args_instances, service, clusters): """Verify that a list of instances specified by user is correct for this service. :param args_instances: a list of instances. :param service: the service name :param cluster: a list of clusters :returns: a list of instances specified in args_instances without any exclusions. """ unverified_instances = args_instances.split(",") service_instances = list_all_instances_for_service(service, clusters=clusters) misspelled_instances = [ i for i in unverified_instances if i not in service_instances ] if misspelled_instances: suggestions = [] for instance in misspelled_instances: suggestions.extend( difflib.get_close_matches(instance, service_instances, n=5, cutoff=0.5)) suggestions = list(set(suggestions)) if clusters: message = ("%s doesn't have any instances matching %s on %s." % ( service, ', '.join(sorted(misspelled_instances)), ', '.join(sorted(clusters)), )) else: message = ("%s doesn't have any instances matching %s." % (service, ', '.join(sorted(misspelled_instances)))) paasta_print(PaastaColors.red(message)) if suggestions: paasta_print("Did you mean any of these?") for instance in sorted(suggestions): paasta_print(" %s" % instance) return unverified_instances
def paasta_status_on_api_endpoint( cluster: str, service: str, instance: str, output: List[str], system_paasta_config: SystemPaastaConfig, verbose: int, ) -> int: client = get_paasta_api_client(cluster, system_paasta_config) if not client: paasta_print('Cannot get a paasta-api client') exit(1) try: status = client.service.status_instance(service=service, instance=instance).result() except HTTPError as exc: paasta_print(exc.response.text) return exc.status_code output.append(' instance: %s' % PaastaColors.blue(instance)) if status.git_sha != '': output.append(' Git sha: %s (desired)' % status.git_sha) if status.marathon is not None: return print_marathon_status(service, instance, output, status.marathon) elif status.kubernetes is not None: return print_kubernetes_status(service, instance, output, status.kubernetes) elif status.tron is not None: return print_tron_status(service, instance, output, status.tron, verbose) elif status.adhoc is not None: return print_adhoc_status(cluster, service, instance, output, status.adhoc, verbose) elif status.flink is not None: return print_flink_status(cluster, service, instance, output, status.flink.get('status'), verbose) else: paasta_print( "Not implemented: Looks like %s is not a Marathon or Kubernetes instance" % instance) return 0
def build_and_push_docker_image(args): """ Build an image if the default Spark service image is not preferred. The image needs to be pushed to a registry for the Spark executors to pull. """ if not makefile_responds_to('cook-image'): paasta_print( "A local Makefile with a 'cook-image' target is required for --build", file=sys.stderr, ) return None default_tag = '{}-{}'.format(DEFAULT_SPARK_DOCKER_IMAGE_PREFIX, get_username()) docker_tag = os.environ.get('DOCKER_TAG', default_tag) os.environ['DOCKER_TAG'] = docker_tag cook_return = paasta_cook_image( args=None, service=args.service, soa_dir=args.yelpsoa_config_root, ) if cook_return is not 0: return None docker_url = f'{args.docker_registry}/{docker_tag}' command = f'docker tag {docker_tag} {docker_url}' paasta_print(PaastaColors.grey(command)) retcode, _ = _run(command, stream=True) if retcode is not 0: return None if args.docker_registry != DEFAULT_SPARK_DOCKER_REGISTRY: command = 'sudo -H docker push %s' % docker_url else: command = 'docker push %s' % docker_url paasta_print(PaastaColors.grey(command)) retcode, output = _run(command, stream=True) if retcode is not 0: return None return docker_url
def extract_args(args): system_paasta_config = get_system_paasta_config() soa_dir = args.yelpsoa_config_root service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root) cluster = args.cluster or system_paasta_config.get_remote_run_config().get( "default_cluster", None) if not cluster: paasta_print( PaastaColors.red( "PaaSTA on this machine has not been configured with a default cluster." "Please pass one using '-c'.")) emit_counter_metric("paasta.remote_run." + args.action + ".failed", service, "UNKNOWN") sys.exit(1) instance = args.instance if instance is None: instance_type = "adhoc" instance = "remote" else: try: instance_type = validate_service_instance(service, instance, cluster, soa_dir) except NoConfigurationForServiceError as e: paasta_print(e) emit_counter_metric("paasta.remote_run." + args.action + ".failed", service, instance) sys.exit(1) if instance_type != "adhoc": paasta_print( PaastaColors.red( "Please use instance declared in adhoc.yaml for use " f"with remote-run, {instance} is declared as {instance_type}" )) emit_counter_metric("paasta.remote_run." + args.action + ".failed", service, instance) sys.exit(1) return (system_paasta_config, service, cluster, soa_dir, instance, instance_type)
def remote_run_list_report(service, instance, cluster, frameworks=None): filtered = remote_run_filter_frameworks( service, instance, frameworks=frameworks, ) filtered.sort(key=lambda x: x.name) for f in filtered: launch_time, run_id = re.match( 'paasta-remote [^\s]+ (\w+) (\w+)', f.name, ).groups() paasta_print("Launch time: %s, run id: %s, framework id: %s" % (launch_time, run_id, f.id)) if len(filtered) > 0: paasta_print( ( "Use `paasta remote-run stop -s {} -c {} -i {} [-R <run id> " "| -F <framework id>]` to stop." ).format(service, cluster, instance), ) else: paasta_print("Nothing found.")
def print_marathon_status( service: str, instance: str, marathon_status, ) -> int: if marathon_status.error_message: paasta_print(marathon_status.error_message) return 1 bouncing_status = bouncing_status_human( marathon_status.app_count, marathon_status.bounce_method, ) desired_state = desired_state_human( marathon_status.desired_state, marathon_status.expected_instance_count, ) paasta_print( f"State: {bouncing_status} - Desired state: {desired_state}") status = MarathonDeployStatus.fromstring(marathon_status.deploy_status) if status != MarathonDeployStatus.NotRunning: if status == MarathonDeployStatus.Delayed: deploy_status = marathon_app_deploy_status_human( status, marathon_status.backoff_seconds) else: deploy_status = marathon_app_deploy_status_human(status) else: deploy_status = 'NotRunning' paasta_print( status_marathon_job_human( service=service, instance=instance, deploy_status=deploy_status, desired_app_id=marathon_status.app_id, app_count=marathon_status.app_count, running_instances=marathon_status.running_instance_count, normal_instance_count=marathon_status.expected_instance_count, ), ) return 0
def check_offer_constraints(offer, constraints, state): """Returns True if all constraints are satisfied by offer's attributes, returns False otherwise. Prints a error message and re-raises if an error was thrown.""" for (attr, op, val) in constraints: try: offer_attr = next( (x for x in offer.attributes if x.name == attr), None) if offer_attr is None: paasta_print("Attribute not found for a constraint: %s" % attr) return False elif not(CONS_OPS[op](val, offer_attr.text.value, offer_attr.name, state)): paasta_print("Constraint not satisfied: [%s %s %s] for %s with %s" % ( attr, op, val, offer_attr.text.value, state)) return False except Exception as err: paasta_print("Error while mathing constraint: [%s %s %s] %s" % ( attr, op, val, str(err))) raise err return True
def check_mesos_no_duplicate_frameworks(): master = get_mesos_master() try: state = master.state except MasterNotAvailableException as e: paasta_print("CRITICAL: %s" % e.message) sys.exit(2) system_paasta_config = load_system_paasta_config() marathon_servers = get_marathon_servers(system_paasta_config) marathon_clients = get_marathon_clients(marathon_servers) marathon_framework_ids = get_marathon_framework_ids(marathon_clients) result = assert_framework_count( state=state, marathon_framework_ids=marathon_framework_ids, ) if result.healthy: paasta_print("OK: " + result.message) sys.exit(0) else: paasta_print("CRITICAL: %s" % result.message) sys.exit(2)
def validate_autoscaling_configs(service_path): """Validate new autoscaling configurations that are not validated by jsonschema for the service of interest. :param service_path: Path to directory containing soa conf yaml files for service """ path = os.path.join(service_path, "*.yaml") returncode = True instances = {} # Read and store all instance configuration in instances dict for file_name in glob(path): if os.path.islink(file_name): continue basename = os.path.basename(file_name) if basename.startswith("kubernetes"): cluster = basename[basename.rfind("kuernetes-") + 1:] instances[cluster] = get_config_file_dict(file_name) # Validate autoscaling configurations for all instances for cluster_name, cluster in instances.items(): for instance_name, instance in cluster.items(): for metric, params in instance.get("new_autoscaling", {}).items(): if len(metric) > 63: returncode = False paasta_print(f"length of metric name {metric} exceeds 63") continue if metric in {"http", "uwsgi"} and "dimensions" in params: for k, v in params["dimensions"].items(): if len(k) > 128: returncode = False paasta_print( f"length of dimension key {k} of instance {instance_name} in {cluster_name} cannot exceed 128" ) if len(v) > 256: returncode = False paasta_print( f"length of dimension value {v} of instance {instance_name} in {cluster_name} cannot exceed 256" ) return returncode
def generate_pipeline(service, soa_dir): email_address = get_team_email_address(service=service, soa_dir=soa_dir) repo = get_git_repo_for_fab_repo(service, soa_dir) if not email_address: owner = get_team(overrides={}, service=service, soa_dir=soa_dir) else: # fab_repo tacks on the domain, so we only want the first # part of the email. owner = re.sub('@.*', '', email_address) cmds = [ 'fab_repo setup_jenkins:services/%s,' 'profile=paasta,job_disabled=False,owner=%s,repo=%s' % (service, owner, repo), 'fab_repo setup_jenkins:services/%s,' 'profile=paasta_boilerplate,owner=%s,repo=%s' % (service, owner, repo), ] print_warning() for cmd in cmds: paasta_print("INFO: Executing %s" % cmd) returncode, output = _run(cmd, timeout=90) if returncode != 0: paasta_print("ERROR: Failed to generate Jenkins pipeline") paasta_print(output) return returncode
def simulate_healthcheck_on_service( instance_config, docker_client, container_id, healthcheck_mode, healthcheck_data, healthcheck_enabled ): """Simulates Marathon-style healthcheck on given service if healthcheck is enabled :param instance_config: service manifest :param docker_client: Docker client object :param container_id: Docker container id :param healthcheck_data: tuple url to healthcheck :param healthcheck_enabled: boolean :returns: healthcheck_passed: boolean """ healthcheck_link = PaastaColors.cyan(healthcheck_data) if healthcheck_enabled: grace_period = instance_config.get_healthcheck_grace_period_seconds() timeout = instance_config.get_healthcheck_timeout_seconds() interval = instance_config.get_healthcheck_interval_seconds() max_failures = instance_config.get_healthcheck_max_consecutive_failures() paasta_print('\nStarting health check via %s (waiting %s seconds before ' 'considering failures due to grace period):' % (healthcheck_link, grace_period)) # silenty start performing health checks until grace period ends or first check succeeds graceperiod_end_time = time.time() + grace_period after_grace_period_attempts = 0 while True: # First inspect the container for early exits container_state = docker_client.inspect_container(container_id) if not container_state['State']['Running']: paasta_print( PaastaColors.red('Container exited with code {}'.format( container_state['State']['ExitCode'], )) ) healthcheck_passed = False break healthcheck_passed, healthcheck_output = run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout, ) # Yay, we passed the healthcheck if healthcheck_passed: paasta_print("{}'{}' (via {})".format( PaastaColors.green("Healthcheck succeeded!: "), healthcheck_output, healthcheck_link, )) break # Otherwise, print why we failed if time.time() < graceperiod_end_time: color = PaastaColors.grey msg = '(disregarded due to grace period)' extra_msg = ' (via: {}. Output: {})'.format(healthcheck_link, healthcheck_output) else: # If we've exceeded the grace period, we start incrementing attempts after_grace_period_attempts += 1 color = PaastaColors.red msg = '(Attempt {} of {})'.format( after_grace_period_attempts, max_failures, ) extra_msg = ' (via: {}. Output: {})'.format(healthcheck_link, healthcheck_output) paasta_print('{}{}'.format( color('Healthcheck failed! {}'.format(msg)), extra_msg, )) if after_grace_period_attempts == max_failures: break time.sleep(interval) else: paasta_print('\nPaaSTA would have healthchecked your service via\n%s' % healthcheck_link) healthcheck_passed = True return healthcheck_passed
def run_docker_container( docker_client, service, instance, docker_hash, volumes, interactive, command, healthcheck, healthcheck_only, user_port, instance_config, soa_dir=DEFAULT_SOA_DIR, dry_run=False, json_dict=False, framework=None, ): """docker-py has issues running a container with a TTY attached, so for consistency we execute 'docker run' directly in both interactive and non-interactive modes. In non-interactive mode when the run is complete, stop the container and remove it (with docker-py). """ if user_port: if check_if_port_free(user_port): chosen_port = user_port else: paasta_print( PaastaColors.red( "The chosen port is already in use!\n" "Try specifying another one, or omit (--port|-o) and paasta will find a free one for you" ), file=sys.stderr, ) sys.exit(1) else: chosen_port = pick_random_port() environment = instance_config.get_env_dictionary() local_run_environment = get_local_run_environment_vars( instance_config=instance_config, port0=chosen_port, framework=framework, ) environment.update(local_run_environment) net = instance_config.get_net() memory = instance_config.get_mem() container_name = get_container_name() docker_params = instance_config.format_docker_parameters() try: container_port = instance_config.get_container_port() except AttributeError: container_port = None docker_run_args = dict( memory=memory, chosen_port=chosen_port, container_port=container_port, container_name=container_name, volumes=volumes, env=environment, interactive=interactive, docker_hash=docker_hash, command=command, net=net, docker_params=docker_params, ) docker_run_cmd = get_docker_run_cmd(**docker_run_args) joined_docker_run_cmd = ' '.join(docker_run_cmd) healthcheck_mode, healthcheck_data = get_healthcheck_for_instance( service, instance, instance_config, chosen_port, soa_dir=soa_dir) if dry_run: if json_dict: paasta_print(json.dumps(docker_run_args)) else: paasta_print(json.dumps(docker_run_cmd)) return 0 else: paasta_print('Running docker command:\n%s' % PaastaColors.grey(joined_docker_run_cmd)) if interactive: # NOTE: This immediately replaces us with the docker run cmd. Docker # run knows how to clean up the running container in this situation. execlp('paasta_docker_wrapper', *docker_run_cmd) # For testing, when execlp is patched out and doesn't replace us, we # still want to bail out. return 0 container_started = False container_id = None try: (returncode, output) = _run(docker_run_cmd) if returncode != 0: paasta_print( 'Failure trying to start your container!' 'Returncode: %d' 'Output:' '%s' '' 'Fix that problem and try again.' 'http://y/paasta-troubleshooting' % (returncode, output), sep='\n', ) # Container failed to start so no need to cleanup; just bail. sys.exit(1) container_started = True container_id = get_container_id(docker_client, container_name) paasta_print('Found our container running with CID %s' % container_id) # If the service has a healthcheck, simulate it if healthcheck_mode is not None: healthcheck_result = simulate_healthcheck_on_service( instance_config=instance_config, docker_client=docker_client, container_id=container_id, healthcheck_mode=healthcheck_mode, healthcheck_data=healthcheck_data, healthcheck_enabled=healthcheck, ) def _output_stdout_and_exit_code(): returncode = docker_client.inspect_container(container_id)['State']['ExitCode'] paasta_print('Container exited: %d)' % returncode) paasta_print('Here is the stdout and stderr:\n\n') paasta_print( docker_client.attach(container_id, stderr=True, stream=False, logs=True) ) if healthcheck_only: if container_started: _output_stdout_and_exit_code() _cleanup_container(docker_client, container_id) if healthcheck_mode is None: paasta_print('--healthcheck-only, but no healthcheck is defined for this instance!') sys.exit(1) elif healthcheck_result is True: sys.exit(0) else: sys.exit(1) running = docker_client.inspect_container(container_id)['State']['Running'] if running: paasta_print('Your service is now running! Tailing stdout and stderr:') for line in docker_client.attach(container_id, stderr=True, stream=True, logs=True): paasta_print(line) else: _output_stdout_and_exit_code() returncode = 3 except KeyboardInterrupt: returncode = 3 # Cleanup if the container exits on its own or interrupted. if container_started: returncode = docker_client.inspect_container(container_id)['State']['ExitCode'] _cleanup_container(docker_client, container_id) return returncode
def _clean_up_soa_dir(context): """If a yelpsoa-configs directory was written, clean it up.""" if hasattr(context, "soa_dir"): paasta_print("Cleaning up %s" % context.soa_dir) shutil.rmtree(context.soa_dir) del context.soa_dir
def _clean_up_etc_paasta(context): if hasattr(context, "etc_paasta"): paasta_print("Cleaning up %s" % context.etc_paasta) shutil.rmtree(context.etc_paasta) del context.etc_paasta
def configure_and_run_docker_container( docker_client, docker_url, docker_sha, service, instance, cluster, system_paasta_config, args, pull_image=False, dry_run=False, ): """ Run Docker container by image hash with args set in command line. Function prints the output of run command in stdout. """ if instance is None and args.healthcheck_only: paasta_print( "With --healthcheck-only, --instance MUST be provided!", file=sys.stderr ) return 1 if instance is None and not sys.stdin.isatty(): paasta_print( "--instance and --cluster must be specified when using paasta local-run without a tty!", file=sys.stderr, ) return 1 soa_dir = args.yelpsoa_config_root volumes = list() load_deployments = (docker_url is None or pull_image) and not docker_sha interactive = args.interactive try: if instance is None: instance_type = "adhoc" instance = "interactive" instance_config = get_default_interactive_config( service=service, cluster=cluster, soa_dir=soa_dir, load_deployments=load_deployments, ) interactive = True else: instance_type = validate_service_instance( service, instance, cluster, soa_dir ) instance_config = get_instance_config( service=service, instance=instance, cluster=cluster, load_deployments=load_deployments, soa_dir=soa_dir, ) except NoConfigurationForServiceError as e: paasta_print(str(e), file=sys.stderr) return 1 except NoDeploymentsAvailable: paasta_print( PaastaColors.red( "Error: No deployments.json found in %(soa_dir)s/%(service)s. " "You can generate this by running: " "generate_deployments_for_service -d %(soa_dir)s -s %(service)s" % {"soa_dir": soa_dir, "service": service} ), sep="\n", file=sys.stderr, ) return 1 if docker_sha is not None: instance_config.branch_dict = { "git_sha": docker_sha, "docker_image": build_docker_image_name(service=service, sha=docker_sha), "desired_state": "start", "force_bounce": None, } if docker_url is None: try: docker_url = instance_config.get_docker_url() except NoDockerImageError: if instance_config.get_deploy_group() is None: paasta_print( PaastaColors.red( f"Error: {service}.{instance} has no 'deploy_group' set. Please set one so " "the proper image can be used to run for this service." ), sep="", file=sys.stderr, ) else: paasta_print( PaastaColors.red( "Error: No sha has been marked for deployment for the %s deploy group.\n" "Please ensure this service has either run through a jenkins pipeline " "or paasta mark-for-deployment has been run for %s\n" % (instance_config.get_deploy_group(), service) ), sep="", file=sys.stderr, ) return 1 if pull_image: docker_pull_image(docker_url) for volume in instance_config.get_volumes(system_paasta_config.get_volumes()): if os.path.exists(volume["hostPath"]): volumes.append( "{}:{}:{}".format( volume["hostPath"], volume["containerPath"], volume["mode"].lower() ) ) else: paasta_print( PaastaColors.yellow( "Warning: Path %s does not exist on this host. Skipping this binding." % volume["hostPath"] ), file=sys.stderr, ) if interactive is True and args.cmd is None: command = "bash" elif args.cmd: command = args.cmd else: command_from_config = instance_config.get_cmd() if command_from_config: command = format_command_for_type( command=command_from_config, instance_type=instance_type, date=args.date ) else: command = instance_config.get_args() secret_provider_kwargs = { "vault_cluster_config": system_paasta_config.get_vault_cluster_config(), "vault_auth_method": args.vault_auth_method, "vault_token_file": args.vault_token_file, } return run_docker_container( docker_client=docker_client, service=service, instance=instance, docker_url=docker_url, volumes=volumes, interactive=interactive, command=command, healthcheck=args.healthcheck, healthcheck_only=args.healthcheck_only, user_port=args.user_port, instance_config=instance_config, soa_dir=args.yelpsoa_config_root, dry_run=dry_run, json_dict=args.dry_run_json_dict, framework=instance_type, secret_provider_name=system_paasta_config.get_secret_provider_name(), secret_provider_kwargs=secret_provider_kwargs, skip_secrets=args.skip_secrets, )
def _clean_up_mesos_cli_config(context): """If a mesos cli config file was written, clean it up.""" if hasattr(context, "mesos_cli_config_filename"): paasta_print("Cleaning up %s" % context.mesos_cli_config_filename) os.unlink(context.mesos_cli_config_filename) del context.mesos_cli_config_filename
def _output_exit_code(): returncode = docker_client.inspect_container(container_id)["State"][ "ExitCode" ] paasta_print(f"Container exited: {returncode})")
def run_docker_container( docker_client, service, instance, docker_url, volumes, interactive, command, healthcheck, healthcheck_only, user_port, instance_config, secret_provider_name, soa_dir=DEFAULT_SOA_DIR, dry_run=False, json_dict=False, framework=None, secret_provider_kwargs={}, skip_secrets=False, ): """docker-py has issues running a container with a TTY attached, so for consistency we execute 'docker run' directly in both interactive and non-interactive modes. In non-interactive mode when the run is complete, stop the container and remove it (with docker-py). """ if user_port: if check_if_port_free(user_port): chosen_port = user_port else: paasta_print( PaastaColors.red( "The chosen port is already in use!\n" "Try specifying another one, or omit (--port|-o) and paasta will find a free one for you" ), file=sys.stderr, ) sys.exit(1) else: chosen_port = pick_random_port(service) environment = instance_config.get_env_dictionary() if not skip_secrets: secret_environment = decrypt_secret_environment_variables( secret_provider_name=secret_provider_name, environment=environment, soa_dir=soa_dir, service_name=service, cluster_name=instance_config.cluster, secret_provider_kwargs=secret_provider_kwargs, ) environment.update(secret_environment) local_run_environment = get_local_run_environment_vars( instance_config=instance_config, port0=chosen_port, framework=framework ) environment.update(local_run_environment) net = instance_config.get_net() memory = instance_config.get_mem() container_name = get_container_name() docker_params = instance_config.format_docker_parameters() healthcheck_mode, healthcheck_data = get_healthcheck_for_instance( service, instance, instance_config, chosen_port, soa_dir=soa_dir ) if healthcheck_mode is None: container_port = None interactive = True elif not user_port and not healthcheck and not healthcheck_only: container_port = None else: try: container_port = instance_config.get_container_port() except AttributeError: container_port = None simulate_healthcheck = ( healthcheck_only or healthcheck ) and healthcheck_mode is not None docker_run_args = dict( memory=memory, chosen_port=chosen_port, container_port=container_port, container_name=container_name, volumes=volumes, env=environment, interactive=interactive, detach=simulate_healthcheck, docker_hash=docker_url, command=command, net=net, docker_params=docker_params, ) docker_run_cmd = get_docker_run_cmd(**docker_run_args) joined_docker_run_cmd = " ".join(docker_run_cmd) if dry_run: if json_dict: paasta_print(json.dumps(docker_run_args)) else: paasta_print(json.dumps(docker_run_cmd)) return 0 else: paasta_print( "Running docker command:\n%s" % PaastaColors.grey(joined_docker_run_cmd) ) merged_env = {**os.environ, **environment} if interactive or not simulate_healthcheck: # NOTE: This immediately replaces us with the docker run cmd. Docker # run knows how to clean up the running container in this situation. wrapper_path = shutil.which("paasta_docker_wrapper") # To properly simulate mesos, we pop the PATH, which is not available to # The executor merged_env.pop("PATH") execlpe(wrapper_path, *docker_run_cmd, merged_env) # For testing, when execlpe is patched out and doesn't replace us, we # still want to bail out. return 0 container_started = False container_id = None try: (returncode, output) = _run(docker_run_cmd, env=merged_env) if returncode != 0: paasta_print( "Failure trying to start your container!" "Returncode: %d" "Output:" "%s" "" "Fix that problem and try again." "http://y/paasta-troubleshooting" % (returncode, output), sep="\n", ) # Container failed to start so no need to cleanup; just bail. sys.exit(1) container_started = True container_id = get_container_id(docker_client, container_name) paasta_print("Found our container running with CID %s" % container_id) if simulate_healthcheck: healthcheck_result = simulate_healthcheck_on_service( instance_config=instance_config, docker_client=docker_client, container_id=container_id, healthcheck_mode=healthcheck_mode, healthcheck_data=healthcheck_data, healthcheck_enabled=healthcheck, ) def _output_exit_code(): returncode = docker_client.inspect_container(container_id)["State"][ "ExitCode" ] paasta_print(f"Container exited: {returncode})") if healthcheck_only: if container_started: _output_exit_code() _cleanup_container(docker_client, container_id) if healthcheck_mode is None: paasta_print( "--healthcheck-only, but no healthcheck is defined for this instance!" ) sys.exit(1) elif healthcheck_result is True: sys.exit(0) else: sys.exit(1) running = docker_client.inspect_container(container_id)["State"]["Running"] if running: paasta_print("Your service is now running! Tailing stdout and stderr:") for line in docker_client.attach( container_id, stderr=True, stream=True, logs=True ): paasta_print(line) else: _output_exit_code() returncode = 3 except KeyboardInterrupt: returncode = 3 # Cleanup if the container exits on its own or interrupted. if container_started: returncode = docker_client.inspect_container(container_id)["State"]["ExitCode"] _cleanup_container(docker_client, container_id) return returncode
def simulate_healthcheck_on_service( instance_config, docker_client, container_id, healthcheck_mode, healthcheck_data, healthcheck_enabled, ): """Simulates Marathon-style healthcheck on given service if healthcheck is enabled :param instance_config: service manifest :param docker_client: Docker client object :param container_id: Docker container id :param healthcheck_data: tuple url to healthcheck :param healthcheck_enabled: boolean :returns: healthcheck_passed: boolean """ healthcheck_link = PaastaColors.cyan(healthcheck_data) if healthcheck_enabled: grace_period = instance_config.get_healthcheck_grace_period_seconds() timeout = instance_config.get_healthcheck_timeout_seconds() interval = instance_config.get_healthcheck_interval_seconds() max_failures = instance_config.get_healthcheck_max_consecutive_failures() paasta_print( "\nStarting health check via %s (waiting %s seconds before " "considering failures due to grace period):" % (healthcheck_link, grace_period) ) # silently start performing health checks until grace period ends or first check succeeds graceperiod_end_time = time.time() + grace_period after_grace_period_attempts = 0 healthchecking = True def _stream_docker_logs(container_id, generator): while healthchecking: try: # the generator will block until another log line is available log_line = next(generator).decode("utf-8").rstrip("\n") if healthchecking: paasta_print(f"container [{container_id[:12]}]: {log_line}") else: # stop streaming at first opportunity, since generator.close() # cant be used until the container is dead break except StopIteration: # natural end of logs break docker_logs_generator = docker_client.logs( container_id, stderr=True, stream=True ) threading.Thread( target=_stream_docker_logs, daemon=True, args=(container_id, docker_logs_generator), ).start() while True: # First inspect the container for early exits container_state = docker_client.inspect_container(container_id) if not container_state["State"]["Running"]: paasta_print( PaastaColors.red( "Container exited with code {}".format( container_state["State"]["ExitCode"] ) ) ) healthcheck_passed = False break healthcheck_passed, healthcheck_output = run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout ) # Yay, we passed the healthcheck if healthcheck_passed: paasta_print( "{}'{}' (via {})".format( PaastaColors.green("Healthcheck succeeded!: "), healthcheck_output, healthcheck_link, ) ) break # Otherwise, print why we failed if time.time() < graceperiod_end_time: color = PaastaColors.grey msg = "(disregarded due to grace period)" extra_msg = f" (via: {healthcheck_link}. Output: {healthcheck_output})" else: # If we've exceeded the grace period, we start incrementing attempts after_grace_period_attempts += 1 color = PaastaColors.red msg = "(Attempt {} of {})".format( after_grace_period_attempts, max_failures ) extra_msg = f" (via: {healthcheck_link}. Output: {healthcheck_output})" paasta_print("{}{}".format(color(f"Healthcheck failed! {msg}"), extra_msg)) if after_grace_period_attempts == max_failures: break time.sleep(interval) healthchecking = False # end docker logs stream else: paasta_print( "\nPaaSTA would have healthchecked your service via\n%s" % healthcheck_link ) healthcheck_passed = True return healthcheck_passed
def configure_and_run_docker_container( docker_client, docker_hash, service, instance, cluster, system_paasta_config, args, pull_image=False, dry_run=False ): """ Run Docker container by image hash with args set in command line. Function prints the output of run command in stdout. """ if instance is None and args.healthcheck_only: paasta_print( "With --healthcheck-only, --instance MUST be provided!", file=sys.stderr, ) return 1 if instance is None and not sys.stdin.isatty(): paasta_print( "--instance and --cluster must be specified when using paasta local-run without a tty!", file=sys.stderr, ) return 1 soa_dir = args.yelpsoa_config_root volumes = list() load_deployments = docker_hash is None or pull_image interactive = args.interactive try: if instance is None: instance_type = 'adhoc' instance = 'interactive' instance_config = get_default_interactive_config( service=service, cluster=cluster, soa_dir=soa_dir, load_deployments=load_deployments, ) interactive = True else: instance_type = validate_service_instance(service, instance, cluster, soa_dir) instance_config = get_instance_config( service=service, instance=instance, cluster=cluster, load_deployments=load_deployments, soa_dir=soa_dir, ) except NoConfigurationForServiceError as e: paasta_print(str(e), file=sys.stderr) return 1 except NoDeploymentsAvailable: paasta_print( PaastaColors.red( "Error: No deployments.json found in %(soa_dir)s/%(service)s." "You can generate this by running:" "generate_deployments_for_service -d %(soa_dir)s -s %(service)s" % { 'soa_dir': soa_dir, 'service': service, } ), sep='\n', file=sys.stderr, ) return 1 if docker_hash is None: try: docker_url = instance_config.get_docker_url() except NoDockerImageError: paasta_print(PaastaColors.red( "Error: No sha has been marked for deployment for the %s deploy group.\n" "Please ensure this service has either run through a jenkins pipeline " "or paasta mark-for-deployment has been run for %s\n" % (instance_config.get_deploy_group(), service)), sep='', file=sys.stderr, ) return 1 docker_hash = docker_url if pull_image: docker_pull_image(docker_url) # if only one volume specified, extra_volumes should be converted to a list extra_volumes = instance_config.get_extra_volumes() if type(extra_volumes) == dict: extra_volumes = [extra_volumes] for volume in system_paasta_config.get_volumes() + extra_volumes: volumes.append('%s:%s:%s' % (volume['hostPath'], volume['containerPath'], volume['mode'].lower())) if interactive is True and args.cmd is None: command = 'bash' elif args.cmd: command = args.cmd else: command_from_config = instance_config.get_cmd() if command_from_config: command_modifier = command_function_for_framework(instance_type) command = command_modifier(command_from_config) else: command = instance_config.get_args() return run_docker_container( docker_client=docker_client, service=service, instance=instance, docker_hash=docker_hash, volumes=volumes, interactive=interactive, command=command, healthcheck=args.healthcheck, healthcheck_only=args.healthcheck_only, user_port=args.user_port, instance_config=instance_config, soa_dir=args.yelpsoa_config_root, dry_run=dry_run, json_dict=args.dry_run_json_dict, framework=instance_type, )
def cleanup_file(path_to_file): """Removes the given file""" paasta_print("Removing generated file: %s" % path_to_file) os.remove(path_to_file)
def paasta_local_run(args): if args.action == 'build' and not makefile_responds_to('cook-image'): paasta_print("A local Makefile with a 'cook-image' target is required for --build", file=sys.stderr) paasta_print("If you meant to pull the docker image from the registry, explicitly pass --pull", file=sys.stderr) return 1 try: system_paasta_config = load_system_paasta_config() except PaastaNotConfiguredError: paasta_print( PaastaColors.yellow( "Warning: Couldn't load config files from '/etc/paasta'. This indicates" "PaaSTA is not configured locally on this host, and local-run may not behave" "the same way it would behave on a server configured for PaaSTA." ), sep='\n', ) system_paasta_config = SystemPaastaConfig({"volumes": []}, '/etc/paasta') local_run_config = system_paasta_config.get_local_run_config() service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root) if args.cluster: cluster = args.cluster else: try: cluster = local_run_config['default_cluster'] except KeyError: paasta_print( PaastaColors.red( "PaaSTA on this machine has not been configured with a default cluster." "Please pass one to local-run using '-c'."), sep='\n', file=sys.stderr, ) return 1 instance = args.instance docker_client = get_docker_client() if args.action == 'build': default_tag = 'paasta-local-run-%s-%s' % (service, get_username()) tag = os.environ.get('DOCKER_TAG', default_tag) os.environ['DOCKER_TAG'] = tag pull_image = False cook_return = paasta_cook_image(args=None, service=service, soa_dir=args.yelpsoa_config_root) if cook_return != 0: return cook_return elif args.action == 'dry_run': pull_image = False tag = None else: pull_image = True tag = None try: return configure_and_run_docker_container( docker_client=docker_client, docker_hash=tag, service=service, instance=instance, cluster=cluster, args=args, pull_image=pull_image, system_paasta_config=system_paasta_config, dry_run=args.action == 'dry_run', ) except errors.APIError as e: paasta_print( 'Can\'t run Docker container. Error: %s' % str(e), file=sys.stderr, ) return 1
def main(): paasta_print(get_mesos_leader())
def clear_mesos_tools_cache(): try: del mesos_tools.master.CURRENT._cache paasta_print("cleared mesos_tools.master.CURRENT._cache") except AttributeError: pass
def paasta_local_run(args): if args.action == "pull" and os.geteuid() != 0 and not docker_config_available(): paasta_print("Re-executing paasta local-run --pull with sudo..") os.execvp("sudo", ["sudo", "-H"] + sys.argv) if args.action == "build" and not makefile_responds_to("cook-image"): paasta_print( "A local Makefile with a 'cook-image' target is required for --build", file=sys.stderr, ) paasta_print( "If you meant to pull the docker image from the registry, explicitly pass --pull", file=sys.stderr, ) return 1 try: system_paasta_config = load_system_paasta_config() except PaastaNotConfiguredError: paasta_print( PaastaColors.yellow( "Warning: Couldn't load config files from '/etc/paasta'. This indicates" "PaaSTA is not configured locally on this host, and local-run may not behave" "the same way it would behave on a server configured for PaaSTA." ), sep="\n", ) system_paasta_config = SystemPaastaConfig({"volumes": []}, "/etc/paasta") local_run_config = system_paasta_config.get_local_run_config() service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root) if args.cluster: cluster = args.cluster else: try: cluster = local_run_config["default_cluster"] except KeyError: paasta_print( PaastaColors.red( "PaaSTA on this machine has not been configured with a default cluster." "Please pass one to local-run using '-c'." ), sep="\n", file=sys.stderr, ) return 1 instance = args.instance docker_client = get_docker_client() docker_sha = None docker_url = None if args.action == "build": default_tag = "paasta-local-run-{}-{}".format(service, get_username()) docker_url = os.environ.get("DOCKER_TAG", default_tag) os.environ["DOCKER_TAG"] = docker_url pull_image = False cook_return = paasta_cook_image( args=None, service=service, soa_dir=args.yelpsoa_config_root ) if cook_return != 0: return cook_return elif args.action == "dry_run": pull_image = False docker_url = None docker_sha = args.sha else: pull_image = True docker_url = None docker_sha = args.sha try: return configure_and_run_docker_container( docker_client=docker_client, docker_url=docker_url, docker_sha=docker_sha, service=service, instance=instance, cluster=cluster, args=args, pull_image=pull_image, system_paasta_config=system_paasta_config, dry_run=args.action == "dry_run", ) except errors.APIError as e: paasta_print("Can't run Docker container. Error: %s" % str(e), file=sys.stderr) return 1