def get_invalid_registrations(self) -> List[str]: registrations = self.config_dict.get("registrations", []) invalid_registrations: List[str] = [] for registration in registrations: try: decompose_job_id(registration) except InvalidJobNameError: invalid_registrations.append(registration) return invalid_registrations
def test_main_sends_event_if_no_deployments(self): fake_client = mock.MagicMock() with contextlib.nested( mock.patch("paasta_tools.setup_marathon_job.parse_args", return_value=self.fake_args, autospec=True), mock.patch( "paasta_tools.setup_marathon_job.get_main_marathon_config", return_value=self.fake_marathon_config, autospec=True, ), mock.patch("paasta_tools.marathon_tools.get_marathon_client", return_value=fake_client, autospec=True), mock.patch( "paasta_tools.marathon_tools.load_marathon_service_config", side_effect=NoDeploymentsAvailable(), autospec=True, ), mock.patch("paasta_tools.setup_marathon_job.setup_service", return_value=(1, "NEVER"), autospec=True), mock.patch("paasta_tools.setup_marathon_job.load_system_paasta_config", autospec=True), mock.patch("paasta_tools.setup_marathon_job.send_event", autospec=True), ) as ( parse_args_patch, get_main_conf_patch, get_client_patch, read_service_conf_patch, setup_service_patch, load_system_paasta_config_patch, sensu_patch, ): load_system_paasta_config_patch.return_value.get_cluster = mock.Mock(return_value=self.fake_cluster) with raises(SystemExit) as exc_info: setup_marathon_job.main() parse_args_patch.assert_called_once_with() get_main_conf_patch.assert_called_once_with() get_client_patch.assert_called_once_with( self.fake_marathon_config.get_url(), self.fake_marathon_config.get_username(), self.fake_marathon_config.get_password(), ) read_service_conf_patch.assert_called_once_with( decompose_job_id(self.fake_args.service_instance)[0], decompose_job_id(self.fake_args.service_instance)[1], self.fake_cluster, soa_dir=self.fake_args.soa_dir, ) expected_string = "No deployments found for %s in cluster %s" % ( self.fake_args.service_instance, self.fake_cluster, ) sensu_patch.assert_called_once_with( decompose_job_id(self.fake_args.service_instance)[0], decompose_job_id(self.fake_args.service_instance)[1], self.fake_args.soa_dir, Status.CRITICAL, expected_string, ) assert exc_info.value.code == 0
def test_main_success(self): fake_client = mock.MagicMock() with contextlib.nested( mock.patch("paasta_tools.setup_marathon_job.parse_args", return_value=self.fake_args, autospec=True), mock.patch( "paasta_tools.setup_marathon_job.get_main_marathon_config", return_value=self.fake_marathon_config, autospec=True, ), mock.patch("paasta_tools.marathon_tools.get_marathon_client", return_value=fake_client, autospec=True), mock.patch( "paasta_tools.marathon_tools.load_marathon_service_config", return_value=self.fake_marathon_service_config, autospec=True, ), mock.patch( "paasta_tools.setup_marathon_job.setup_service", return_value=(0, "it_is_finished"), autospec=True ), mock.patch("paasta_tools.setup_marathon_job.load_system_paasta_config", autospec=True), mock.patch("paasta_tools.setup_marathon_job.send_event", autospec=True), mock.patch("sys.exit", autospec=True), ) as ( parse_args_patch, get_main_conf_patch, get_client_patch, read_service_conf_patch, setup_service_patch, load_system_paasta_config_patch, sensu_patch, sys_exit_patch, ): load_system_paasta_config_patch.return_value.get_cluster = mock.Mock(return_value=self.fake_cluster) setup_marathon_job.main() parse_args_patch.assert_called_once_with() get_main_conf_patch.assert_called_once_with() get_client_patch.assert_called_once_with( self.fake_marathon_config.get_url(), self.fake_marathon_config.get_username(), self.fake_marathon_config.get_password(), ) read_service_conf_patch.assert_called_once_with( decompose_job_id(self.fake_args.service_instance)[0], decompose_job_id(self.fake_args.service_instance)[1], self.fake_cluster, soa_dir=self.fake_args.soa_dir, ) setup_service_patch.assert_called_once_with( decompose_job_id(self.fake_args.service_instance)[0], decompose_job_id(self.fake_args.service_instance)[1], fake_client, self.fake_marathon_config, self.fake_marathon_service_config, "no_more", ) sys_exit_patch.assert_called_once_with(0)
def get_registrations(self): registrations = self.config_dict.get("registrations", []) for registration in registrations: try: decompose_job_id(registration) except InvalidJobNameError: log.error("Provided registration {0} for service " "{1} is invalid".format(registration, self.service)) # Backwards compatbility with nerve_ns # FIXME(jlynch|2016-08-02, PAASTA-4964): DEPRECATE nerve_ns and remove it if not registrations and "nerve_ns" in self.config_dict: registrations.append(compose_job_id(self.service, self.config_dict["nerve_ns"])) return registrations or [compose_job_id(self.service, self.instance)]
def get_registrations(self) -> List[str]: """ We register in vitess.main """ registrations = self.config_dict.get("registrations", []) for registration in registrations: try: decompose_job_id(registration) except InvalidJobNameError: log.error("Provided registration {} for service " "{} is invalid".format(registration, self.service)) return registrations or [ compose_job_id(self.get_service_name_smartstack(), "main") ]
def get_marathon_services_running_here_for_nerve(cluster, soa_dir): if not cluster: try: cluster = load_system_paasta_config().get_cluster() # In the cases where there is *no* cluster or in the case # where there isn't a Paasta configuration file at *all*, then # there must be no marathon services running here, so we catch # these custom exceptions and return []. except (PaastaNotConfiguredError): return [] # When a cluster is defined in mesos, let's iterate through marathon services marathon_services = marathon_services_running_here() nerve_list = [] for name, instance, port in marathon_services: try: registrations = read_all_registrations_for_service_instance( name, instance, cluster, soa_dir ) for registration in registrations: reg_service, reg_namespace, _, __ = decompose_job_id(registration) nerve_dict = load_service_namespace_config( service=reg_service, namespace=reg_namespace, soa_dir=soa_dir, ) if not nerve_dict.is_in_smartstack(): continue nerve_dict['port'] = port nerve_list.append((registration, nerve_dict)) except KeyError: continue # SOA configs got deleted for this app, it'll get cleaned up return nerve_list
def write_soa_dir_dependent_chronos_instance( context, service, disabled, instance, parent ): soa_dir = "/nail/etc/services/" desired_disabled = disabled == "disabled" if not os.path.exists(os.path.join(soa_dir, service)): os.makedirs(os.path.join(soa_dir, service)) with open( os.path.join(soa_dir, service, "chronos-%s.yaml" % context.cluster), "w" ) as f: (_, parent_instance, _, __) = decompose_job_id(parent) f.write( yaml.safe_dump( { parent_instance: { "schedule": "R0/2000-01-01T16:20:00Z/PT60S", # R0 prevents the job from being scheduled automatically "cmd": 'echo "Taking a nap..." && sleep 60m && echo "Nap time over, back to work"', "monitoring": {"team": "fake_team"}, "disabled": desired_disabled, }, instance: { "parents": [parent], "cmd": 'echo "Taking a nap..." && sleep 1m && echo "Nap time over, back to work"', "monitoring": {"team": "fake_team"}, "disabled": desired_disabled, }, } ) ) context.soa_dir = soa_dir
def run_marathon_app(context, job_id, instances): (service, instance, _, __) = decompose_job_id(job_id) job_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=load_system_paasta_config().get_cluster(), soa_dir=context.soa_dir, ) app_id = job_config.format_marathon_app_dict()["id"] app_config = { "id": app_id, "cmd": "/bin/sleep 1m", "container": { "type": "DOCKER", "docker": { "network": "BRIDGE", "image": "busybox" }, }, "instances": instances, "constraints": [["hostname", "UNIQUE"]], } paasta_tools.bounce_lib.create_marathon_app( app_id=app_id, config=app_config, client=context.marathon_clients.get_current_client_for_service( job_config), )
def get_proxy_port_for_instance(name, instance, cluster=None, soa_dir=DEFAULT_SOA_DIR): """Get the proxy_port defined in the first namespace configuration for a service instance. This means that the namespace first has to be loaded from the service instance's configuration, and then the proxy_port has to loaded from the smartstack configuration for that namespace. :param name: The service name :param instance: The instance of the service :param cluster: The cluster to read the configuration for :param soa_dir: The SOA config directory to read from :returns: The proxy_port for the service instance, or None if not defined""" registration = read_registration_for_service_instance( name, instance, cluster, soa_dir) service, namespace, _, __ = decompose_job_id(registration) nerve_dict = load_service_namespace_config( service=service, namespace=namespace, soa_dir=soa_dir, ) return nerve_dict.get('proxy_port')
def create_app_with_instances(context, job_id, number): context.job_id = job_id if 'app_id' not in context: (service, instance, _, __) = decompose_job_id(job_id) context.app_id = marathon_tools.create_complete_config(service, instance, None, soa_dir=context.soa_dir)['id'] set_number_instances(context, number) create_complete_app(context)
def service_instance_status_error(context, error_code, job_id): marathon_config = marathon_tools.load_marathon_config() settings.marathon_client = marathon_tools.get_marathon_client( marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password() ) settings.cluster = load_system_paasta_config().get_cluster() settings.soa_dir = context.soa_dir (service, instance, _, __) = decompose_job_id(job_id) request = testing.DummyRequest() request.matchdict = {'service': service, 'instance': instance} response = None try: response = instance_status(request) except InstanceFailure as exc: print exc.msg assert exc.err == int(error_code) except: raise assert not response
def setup_kube_deployments( kube_client: KubeClient, service_instances: Sequence[str], soa_dir: str = DEFAULT_SOA_DIR, ) -> bool: if service_instances: existing_kube_deployments = set(list_all_deployments(kube_client)) existing_apps = {(deployment.service, deployment.instance) for deployment in existing_kube_deployments} service_instances_with_valid_names = [ decompose_job_id(service_instance) for service_instance in service_instances if validate_job_name(service_instance) ] applications = [ create_application_object( kube_client=kube_client, service=service_instance[0], instance=service_instance[1], soa_dir=soa_dir, ) for service_instance in service_instances_with_valid_names ] for _, app in applications: if (app and (app.kube_deployment.service, app.kube_deployment.instance) not in existing_apps): app.create(kube_client) elif app and app.kube_deployment not in existing_kube_deployments: app.update(kube_client) else: log.debug(f"{app} is up to date, no action taken") return (False, None) not in applications and len( service_instances_with_valid_names) == len(service_instances)
def run_marathon_app(context, job_id, instances): (service, instance, _, __) = decompose_job_id(job_id) job_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=load_system_paasta_config().get_cluster(), soa_dir=context.soa_dir, ) app_id = job_config.format_marathon_app_dict()['id'] app_config = { 'id': app_id, 'cmd': '/bin/sleep 1m', 'container': { 'type': 'DOCKER', 'docker': { 'network': 'BRIDGE', 'image': 'busybox', }, }, 'instances': instances, 'constraints': [["hostname", "UNIQUE"]], } paasta_tools.bounce_lib.create_marathon_app( app_id=app_id, config=app_config, client=context.marathon_clients.get_current_client_for_service(job_config), )
def status_marathon_job(context, status, job_id): normal_instance_count = 1 (service, instance, _, __) = decompose_job_id(job_id) job_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=load_system_paasta_config().get_cluster(), soa_dir=context.soa_dir, ) app_id = job_config.format_marathon_app_dict()['id'] with requests_cache.disabled(): tasks, output = marathon_serviceinit.status_marathon_job( service=service, instance=instance, cluster=load_system_paasta_config().get_cluster(), soa_dir=context.soa_dir, dashboards=None, normal_instance_count=normal_instance_count, clients=context.marathon_clients, job_config=job_config, desired_app_id=app_id, verbose=0, ) assert status in output, f"{status!r} not found in {output!r}"
def marathon_app_task_count(context, job_id, task_count): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config( service, instance, soa_dir=context.soa_dir)['id'] tasks = context.marathon_client.get_app(app_id).tasks assert len(tasks) == task_count
def get_marathon_services_running_here_for_nerve(cluster, soa_dir): if not cluster: try: cluster = load_system_paasta_config().get_cluster() # In the cases where there is *no* cluster or in the case # where there isn't a Paasta configuration file at *all*, then # there must be no marathon services running here, so we catch # these custom exceptions and return []. except (PaastaNotConfiguredError): return [] # When a cluster is defined in mesos, let's iterate through marathon services marathon_services = marathon_services_running_here() nerve_list = [] for name, instance, port in marathon_services: try: registrations = read_all_registrations_for_service_instance( name, instance, cluster, soa_dir) for registration in registrations: reg_service, reg_namespace, _, __ = decompose_job_id( registration) nerve_dict = load_service_namespace_config( service=reg_service, namespace=reg_namespace, soa_dir=soa_dir, ) if not nerve_dict.is_in_smartstack(): continue nerve_dict['port'] = port nerve_list.append((registration, nerve_dict)) except KeyError: continue # SOA configs got deleted for this app, it'll get cleaned up return nerve_list
def get_registrations(self) -> List[str]: registrations = self.config_dict.get("registrations", []) for registration in registrations: try: decompose_job_id(registration) except InvalidJobNameError: log.error("Provided registration {} for service " "{} is invalid".format(registration, self.service)) # Backwards compatibility with nerve_ns # FIXME(jlynch|2016-08-02, PAASTA-4964): DEPRECATE nerve_ns and remove it if not registrations and "nerve_ns" in self.config_dict: registrations.append( compose_job_id(self.service, self.config_dict["nerve_ns"])) return registrations or [compose_job_id(self.service, self.instance)]
def check_sha_changed(context, service_instance): service, instance, _, _ = decompose_job_id(service_instance) service_configuration_lib._yaml_cache = {} context.marathon_config = load_marathon_service_config_no_cache( service, instance, context.cluster) assert context.app_id != context.marathon_config.format_marathon_app_dict( )['id']
def write_soa_dir_marathon_job(context, job_id, shard=None, previous_shard=None): (service, instance, _, __) = decompose_job_id(job_id) try: soa_dir = context.soa_dir except AttributeError: soa_dir = '/nail/etc/services/' if not os.path.exists(os.path.join(soa_dir, service)): os.makedirs(os.path.join(soa_dir, service)) soa = { str(instance): { 'cpus': 0.1, 'mem': 100, 'marathon_shard': shard, 'previous_marathon_shards': [previous_shard] if previous_shard else None, }, } if hasattr(context, "cmd"): soa[instance]['cmd'] = context.cmd with open( os.path.join(soa_dir, service, 'marathon-%s.yaml' % context.cluster), 'w') as f: f.write(yaml.safe_dump(soa)) context.soa_dir = soa_dir
def synapse_replication_is_low(service, instance, system_paasta_config, local_backends): crit_threshold = 80 reg_svc, reg_namespace, _, __ = utils.decompose_job_id( read_registration_for_service_instance( service=service, instance=instance ) ) # We only actually care about the replication of where we're registering service, namespace = reg_svc, reg_namespace smartstack_replication_info = load_smartstack_info_for_service( service=service, namespace=namespace, blacklist=[], system_paasta_config=system_paasta_config, ) expected_count = get_expected_instance_count_for_namespace(service=service, namespace=namespace) expected_count_per_location = int(expected_count / len(smartstack_replication_info)) synapse_name = utils.compose_job_id(service, namespace) local_replication = get_replication_for_services( synapse_host=system_paasta_config.get_default_synapse_host(), synapse_port=system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=system_paasta_config.get_synapse_haproxy_url_format(), services=[synapse_name], ) num_available = local_replication.get(synapse_name, 0) under_replicated, ratio = utils.is_under_replicated( num_available, expected_count_per_location, crit_threshold) log.info('Service %s.%s has %d out of %d expected instances' % ( service, instance, num_available, expected_count_per_location)) return under_replicated
def write_soa_dir_marathon_job(context, job_id, shard=None, previous_shard=None): (service, instance, _, __) = decompose_job_id(job_id) try: soa_dir = context.soa_dir except AttributeError: soa_dir = "/nail/etc/services/" if not os.path.exists(os.path.join(soa_dir, service)): os.makedirs(os.path.join(soa_dir, service)) soa = { str(instance): { "cpus": 0.1, "mem": 100, "marathon_shard": shard, "previous_marathon_shards": [previous_shard] if previous_shard else None, } } if hasattr(context, "cmd"): soa[instance]["cmd"] = context.cmd with open( os.path.join(soa_dir, service, "marathon-%s.yaml" % context.cluster), "w" ) as f: f.write(yaml.safe_dump(soa)) context.soa_dir = soa_dir
def get_registrations(self): registrations = self.config_dict.get('registrations', []) for registration in registrations: try: decompose_job_id(registration) except InvalidJobNameError: log.error('Provided registration {0} for service ' '{1} is invalid'.format(registration, self.service)) # Backwards compatbility with nerve_ns # FIXME(jlynch|2016-08-02, PAASTA-4964): DEPRECATE nerve_ns and remove it if not registrations and 'nerve_ns' in self.config_dict: registrations.append( compose_job_id(self.service, self.config_dict['nerve_ns'])) return registrations or [compose_job_id(self.service, self.instance)]
def synapse_replication_is_low(service, instance, system_paasta_config, local_backends): crit_threshold = 80 reg_svc, reg_namespace, _, __ = utils.decompose_job_id( read_registration_for_service_instance(service=service, instance=instance)) # We only actually care about the replication of where we're registering service, namespace = reg_svc, reg_namespace smartstack_replication_info = load_smartstack_info_for_service( service=service, namespace=namespace, blacklist=[], system_paasta_config=system_paasta_config, ) expected_count = get_expected_instance_count_for_namespace( service=service, namespace=namespace) expected_count_per_location = int(expected_count / len(smartstack_replication_info)) synapse_name = utils.compose_job_id(service, namespace) local_replication = get_replication_for_services( synapse_host=system_paasta_config.get_default_synapse_host(), synapse_port=system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=system_paasta_config. get_synapse_haproxy_url_format(), services=[synapse_name], ) num_available = local_replication.get(synapse_name, 0) under_replicated, ratio = utils.is_under_replicated( num_available, expected_count_per_location, crit_threshold) log.info('Service %s.%s has %d out of %d expected instances' % (service, instance, num_available, expected_count_per_location)) return under_replicated
def setup_kube_deployments( kube_client: KubeClient, service_instances: Sequence[str], soa_dir: str = DEFAULT_SOA_DIR, ) -> bool: suceeded = True if service_instances: deployments = list_all_deployments(kube_client) for service_instance in service_instances: try: service, instance, _, __ = decompose_job_id(service_instance) except InvalidJobNameError: log.error( "Invalid service instance specified. Format is service%sinstance." % SPACER) suceeded = False else: if reconcile_kubernetes_deployment( kube_client=kube_client, service=service, instance=instance, kube_deployments=deployments, soa_dir=soa_dir, )[0]: suceeded = False return suceeded
def marathon_app_task_count(context, job_id, task_count): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, None, soa_dir=context.soa_dir)['id'] client = context.marathon_client tasks = client.list_tasks(app_id=app_id) assert len(tasks) == task_count
def marathon_restart_gets_new_task_ids(context, job_id): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config( service, instance, soa_dir=context.soa_dir)['id'] normal_instance_count = 1 cluster = context.system_paasta_config['cluster'] old_tasks = context.marathon_client.get_app(app_id).tasks with mock.patch('paasta_tools.marathon_serviceinit._log', autospec=True): marathon_serviceinit.restart_marathon_job( service, instance, app_id, normal_instance_count, context.marathon_client, cluster, ) paasta_print("Sleeping 5 seconds to wait for %s to be restarted." % service) time.sleep(5) new_tasks = context.marathon_client.get_app(app_id).tasks paasta_print("Tasks before the restart: %s" % old_tasks) paasta_print("Tasks after the restart: %s" % new_tasks) paasta_print() # sacrificial line for behave to eat instead of our output assert old_tasks != new_tasks
def write_soa_dir_dependent_chronos_instance(context, service, disabled, instance, parent): soa_dir = '/nail/etc/services/' desired_disabled = (disabled == 'disabled') if not os.path.exists(os.path.join(soa_dir, service)): os.makedirs(os.path.join(soa_dir, service)) with open( os.path.join(soa_dir, service, 'chronos-%s.yaml' % context.cluster), 'w') as f: (_, parent_instance, _, __) = decompose_job_id(parent) f.write( yaml.safe_dump({ parent_instance: { 'schedule': 'R0/2000-01-01T16:20:00Z/PT60S', # R0 prevents the job from being scheduled automatically 'cmd': 'echo "Taking a nap..." && sleep 60m && echo "Nap time over, back to work"', 'monitoring': { 'team': 'fake_team' }, 'disabled': desired_disabled, }, instance: { 'parents': [parent], 'cmd': 'echo "Taking a nap..." && sleep 1m && echo "Nap time over, back to work"', 'monitoring': { 'team': 'fake_team' }, 'disabled': desired_disabled, }, })) context.soa_dir = soa_dir
def kill_marathon_app(full_appid, cluster, client, soa_dir): service, instance, _, __ = (s.replace("--", "_") for s in decompose_job_id(full_appid)) service_instance_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir) complete_config = service_instance_config.format_marathon_app_dict() registrations = service_instance_config.get_registrations() service_namespace_config = marathon_tools.load_service_namespace_config( service=service, namespace=registrations[0]) drain_method = drain_lib.get_drain_method( service_instance_config.get_drain_method(service_namespace_config), service=service, instance=instance, registrations=registrations, drain_method_params=service_instance_config.get_drain_method_params( service_namespace_config), ) bounce_func = bounce_lib.get_bounce_method_func("down") while marathon_tools.is_app_id_running(app_id=full_appid, client=client): app_to_kill = client.get_app(full_appid) ( old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks, old_app_at_risk_tasks, ) = get_tasks_by_state( other_apps=[app_to_kill], drain_method=drain_method, service=service, nerve_ns=registrations[0], bounce_health_params=service_instance_config. get_bounce_health_params(service_namespace_config), ) do_bounce( bounce_func=bounce_func, drain_method=drain_method, config=complete_config, new_app_running="", happy_new_tasks=[], old_app_live_happy_tasks=old_app_live_happy_tasks, old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, old_app_draining_tasks=old_app_draining_tasks, old_app_at_risk_tasks=old_app_at_risk_tasks, serviceinstance=f"{service}.{instance}", bounce_method="down", service=service, cluster=cluster, instance=instance, marathon_jobid=full_appid, client=client, soa_dir=soa_dir, ) paasta_print("Sleeping for 10 seconds to give the tasks time to drain") time.sleep(10) paasta_print(f"Successfully killed {full_appid}")
def main(): """Attempt to set up the marathon service instance given. Exits 1 if the deployment failed. This is done in the following order: - Load the marathon configuration - Connect to marathon - Load the service instance's configuration - Create the complete marathon job configuration - Deploy/bounce the service - Emit an event about the deployment to sensu""" args = parse_args() soa_dir = args.soa_dir if args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) try: service, instance, _, __ = decompose_job_id(args.service_instance) except InvalidJobNameError: log.error("Invalid service instance specified. Format is service%sinstance." % SPACER) sys.exit(1) marathon_config = get_main_marathon_config() client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password()) try: service_instance_config = marathon_tools.load_marathon_service_config( service, instance, load_system_paasta_config().get_cluster(), soa_dir=soa_dir, ) except NoDeploymentsAvailable: log.debug("No deployments found for %s in cluster %s. Skipping." % (args.service_instance, load_system_paasta_config().get_cluster())) sys.exit(0) except NoConfigurationForServiceError: error_msg = "Could not read marathon configuration file for %s in cluster %s" % \ (args.service_instance, load_system_paasta_config().get_cluster()) log.error(error_msg) sys.exit(1) try: status, output = setup_service(service, instance, client, marathon_config, service_instance_config, soa_dir) sensu_status = pysensu_yelp.Status.CRITICAL if status else pysensu_yelp.Status.OK send_event(service, instance, soa_dir, sensu_status, output) # We exit 0 because the script finished ok and the event was sent to the right team. sys.exit(0) except (KeyError, TypeError, AttributeError, InvalidInstanceConfig): import traceback error_str = traceback.format_exc() log.error(error_str) send_event(service, instance, soa_dir, pysensu_yelp.Status.CRITICAL, error_str) # We exit 0 because the script finished ok and the event was sent to the right team. sys.exit(0)
def kill_marathon_app(full_appid, cluster, client, soa_dir): service, instance, _, __ = (s.replace('--', '_') for s in decompose_job_id(full_appid)) service_instance_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) complete_config = service_instance_config.format_marathon_app_dict() nerve_ns = service_instance_config.get_nerve_namespace() service_namespace_config = marathon_tools.load_service_namespace_config(service=service, namespace=nerve_ns) drain_method = drain_lib.get_drain_method( service_instance_config.get_drain_method(service_namespace_config), service=service, instance=instance, nerve_ns=nerve_ns, drain_method_params=service_instance_config.get_drain_method_params(service_namespace_config), ) bounce_func = bounce_lib.get_bounce_method_func('down') while marathon_tools.is_app_id_running(app_id=full_appid, client=client): app_to_kill = client.get_app(full_appid) ( old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks, old_app_at_risk_tasks, ) = get_tasks_by_state( other_apps=[app_to_kill], drain_method=drain_method, service=service, nerve_ns=nerve_ns, bounce_health_params=service_instance_config.get_bounce_health_params(service_namespace_config), ) do_bounce( bounce_func=bounce_func, drain_method=drain_method, config=complete_config, new_app_running='', happy_new_tasks=[], old_app_live_happy_tasks=old_app_live_happy_tasks, old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, old_app_draining_tasks=old_app_draining_tasks, old_app_at_risk_tasks=old_app_at_risk_tasks, serviceinstance="{}.{}".format(service, instance), bounce_method='down', service=service, cluster=cluster, instance=instance, marathon_jobid=full_appid, client=client, soa_dir=soa_dir, ) paasta_print("Sleeping for 10 seconds to give the tasks time to drain") time.sleep(10) paasta_print("Sucessfully killed {}".format(full_appid))
def service_instance_status(context, app_count, job_id): (service, instance, _, __) = decompose_job_id(job_id) response = context.paasta_api_client.service.status_instance( instance=instance, service=service, ).result() assert response['marathon']['app_count'] == int(app_count), response
def chronos_service_instance_status(context, desired_state, job_id): (service, instance, _, __) = decompose_job_id(job_id) response = context.paasta_api_client.service.status_instance( instance=instance, service=service, ).result() assert response['chronos']['desired_state'] == desired_state, response
def main(): """Attempt to set up the marathon service instance given. Exits 1 if the deployment failed. This is done in the following order: - Load the marathon configuration - Connect to marathon - Load the service instance's configuration - Create the complete marathon job configuration - Deploy/bounce the service - Emit an event about the deployment to sensu""" args = parse_args() soa_dir = args.soa_dir if args.verbose: log.setLevel(logging.DEBUG) else: log.setLevel(logging.WARNING) try: service, instance, _, __ = decompose_job_id(args.service_instance) except InvalidJobNameError: log.error("Invalid service instance specified. Format is service%sinstance." % SPACER) sys.exit(1) marathon_config = get_main_marathon_config() client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password()) try: service_instance_config = marathon_tools.load_marathon_service_config( service, instance, load_system_paasta_config().get_cluster(), soa_dir=soa_dir, ) except NoDeploymentsAvailable: log.debug("No deployments found for %s in cluster %s. Skipping." % (args.service_instance, load_system_paasta_config().get_cluster())) sys.exit(0) except NoConfigurationForServiceError: error_msg = "Could not read marathon configuration file for %s in cluster %s" % \ (args.service_instance, load_system_paasta_config().get_cluster()) log.error(error_msg) sys.exit(1) try: status, output = setup_service(service, instance, client, marathon_config, service_instance_config, soa_dir) sensu_status = pysensu_yelp.Status.CRITICAL if status else pysensu_yelp.Status.OK send_event(service, instance, soa_dir, sensu_status, output) # We exit 0 because the script finished ok and the event was sent to the right team. sys.exit(0) except (KeyError, TypeError, AttributeError, InvalidInstanceConfig): import traceback error_str = traceback.format_exc() log.error(error_str) send_event(service, instance, soa_dir, pysensu_yelp.Status.CRITICAL, error_str) # We exit 0 because the script finished ok and the event was sent to the right team. sys.exit(0)
def wait_launch_tasks(context, job_id, task_count): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config( service, instance, soa_dir=context.soa_dir)['id'] client = context.marathon_client marathon_tools.wait_for_app_to_launch_tasks(client, app_id, task_count, exact_matches_only=True)
def run_marathon_app(context, job_id): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, None, soa_dir=context.soa_dir)['id'] app_config = { 'id': app_id, 'cmd': '/bin/sleep 1m', } with mock.patch('paasta_tools.bounce_lib.create_app_lock'): paasta_tools.bounce_lib.create_marathon_app(app_id, app_config, context.marathon_client)
def status_marathon_job(context, status, job_id): normal_instance_count = 1 (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, None, soa_dir=context.soa_dir)["id"] output = marathon_serviceinit.status_marathon_job( service, instance, app_id, normal_instance_count, context.marathon_client ) assert status in output
def validate_job_name(service_instance: str) -> bool: try: service, instance, _, __ = decompose_job_id(service_instance) except InvalidJobNameError: log.error( "Invalid service instance specified. Format is service%sinstance." % SPACER) return False return True
def main() -> None: """Attempt to set up a list of marathon service instances given. Exits 1 if any service.instance deployment failed. This is done in the following order: - Load the marathon configuration - Connect to marathon - Do the following for each service.instance: - Load the service instance's configuration - Create the complete marathon job configuration - Deploy/bounce the service - Emit an event about the deployment to sensu""" args = parse_args() soa_dir = args.soa_dir if args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) # Setting up transparent cache for http API calls requests_cache.install_cache("setup_marathon_jobs", backend="memory") system_paasta_config = load_system_paasta_config() clients = marathon_tools.get_marathon_clients( marathon_tools.get_marathon_servers(system_paasta_config) ) unique_clients = clients.get_all_clients() marathon_apps_with_clients = marathon_tools.get_marathon_apps_with_clients( unique_clients, embed_tasks=True ) num_failed_deployments = 0 for service_instance in args.service_instance_list: try: service, instance, _, __ = decompose_job_id(service_instance) except InvalidJobNameError: log.error( "Invalid service instance specified. Format is service%sinstance." % SPACER ) num_failed_deployments = num_failed_deployments + 1 else: if deploy_marathon_service( service, instance, clients, soa_dir, marathon_apps_with_clients )[0]: num_failed_deployments = num_failed_deployments + 1 requests_cache.uninstall_cache() log.debug( "%d out of %d service.instances failed to deploy." % (num_failed_deployments, len(args.service_instance_list)) ) sys.exit(1 if num_failed_deployments else 0)
def create_app_with_instances(context, job_id, number): set_number_instances(context, number) context.job_id = job_id (service, instance, _, __) = decompose_job_id(job_id) context.service = service context.instance = instance context.zk_hosts = '%s/mesos-testcluster' % get_service_connection_string('zookeeper') update_context_marathon_config(context) context.app_id = context.marathon_complete_config['id'] run_setup_marathon_job(context)
def paasta_serviceinit_command_appid(context, command, job_id): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, None, soa_dir=context.soa_dir)['id'] cmd = '../paasta_tools/paasta_serviceinit.py --soa-dir %s --appid %s %s %s' \ % (context.soa_dir, app_id, job_id, command) print 'Running cmd %s' % cmd (exit_code, output) = _run(cmd) print 'Got exitcode %s with output:\n%s' % (exit_code, output) print # sacrificial line for behave to eat instead of our output assert exit_code == 0
def create_app_with_instances_constraints(context, job_id, number, constraints): set_number_instances(context, number) context.job_id = job_id (service, instance, _, __) = decompose_job_id(job_id) context.service = service context.instance = instance context.zk_hosts = "%s/mesos-testcluster" % get_service_connection_string("zookeeper") context.constraints = constraints update_context_marathon_config(context) context.app_id = context.marathon_complete_config["id"] run_setup_marathon_job(context)
def service_instance_status(context, app_count, job_id): settings.cluster = context.cluster settings.marathon_client = context.marathon_client settings.soa_dir = context.soa_dir (service, instance, _, __) = decompose_job_id(job_id) request = testing.DummyRequest() request.swagger_data = {'service': service, 'instance': instance} response = instance_status(request) assert response['marathon']['app_count'] == int(app_count), response assert response['marathon']['running_instance_count'] == response['marathon']['expected_instance_count'], response
def write_soa_dir_marathon_job(context, job_id): (service, instance, _, __) = decompose_job_id(job_id) soa_dir = mkdtemp() if not os.path.exists(os.path.join(soa_dir, service)): os.makedirs(os.path.join(soa_dir, service)) with open(os.path.join(soa_dir, service, 'marathon-%s.yaml' % context.cluster), 'w') as f: f.write(yaml.dump({ "%s" % instance: { 'cpus': 0.1, 'mem': 100, } })) context.soa_dir = soa_dir
def service_instance_status_error(context, error_code, job_id): (service, instance, _, __) = decompose_job_id(job_id) request = testing.DummyRequest() request.swagger_data = {'service': service, 'instance': instance} response = None try: response = instance_status(request) except ApiFailure as exc: assert 'not found' in exc.msg assert exc.err == int(error_code) assert not response
def delete_apps(context, job_id, cluster_name): context.job_id = job_id (service, instance, _, __) = decompose_job_id(job_id) context.service = service context.instance = instance context.zk_hosts = '%s/mesos-testcluster' % get_service_connection_string('zookeeper') update_context_marathon_config(context) context.app_id = context.marathon_complete_config['id'] os.remove("{0}/{1}/marathon-{2}.yaml".format(context.soa_dir, service, cluster_name)) os.remove("{0}/{1}/deployments.json".format(context.soa_dir, service, cluster_name)) os.rmdir("{0}/{1}".format(context.soa_dir, service))
def main(): args = parse_args() if args.debug: log.setLevel(logging.DEBUG) else: log.setLevel(logging.WARNING) instances = [] return_codes = [] command = args.command if (args.service_instance): service_instance = args.service_instance service, instance, _, __ = decompose_job_id(service_instance) instances.append(instance) elif (args.service and args.instances): service = args.service instances = args.instances.split(',') else: log.error("The name of service or the name of instance to inspect is missing. Exiting.") sys.exit(1) cluster = load_system_paasta_config().get_cluster() for instance in instances: instance_type = validate_service_instance(service, instance, cluster, args.soa_dir) if instance_type == 'marathon': return_code = marathon_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, app_id=args.app_id, delta=args.delta, ) elif instance_type == 'chronos': return_code = chronos_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, ) else: log.error("I calculated an instance_type of %s for %s which I don't know how to handle. Exiting." % (instance_type, compose_job_id(service, instance))) return_code = 1 return_codes.append(return_code) sys.exit(max(return_codes))
def status_marathon_job(context, status, job_id): normal_instance_count = 1 (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, soa_dir=context.soa_dir)['id'] with requests_cache.disabled(): output = marathon_serviceinit.status_marathon_job( service, instance, app_id, normal_instance_count, context.marathon_client ) assert status in output
def service_instance_status_error(context, error_code, job_id): (service, instance, _, __) = decompose_job_id(job_id) request = testing.DummyRequest() request.swagger_data = {"service": service, "instance": instance} response = None try: response = instance_status(request) except InstanceFailure as exc: assert "not found" in exc.msg assert exc.err == int(error_code) except: raise assert not response
def run_marathon_app(context, job_id): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, soa_dir=context.soa_dir)['id'] app_config = { 'id': app_id, 'cmd': '/bin/sleep 1m', 'container': { 'type': 'DOCKER', 'docker': { 'network': 'BRIDGE', 'image': 'busybox', }, }, } with mock.patch('paasta_tools.bounce_lib.create_app_lock'): paasta_tools.bounce_lib.create_marathon_app(app_id, app_config, context.marathon_client)
def marathon_restart_gets_new_task_ids(context, job_id): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, None, soa_dir=context.soa_dir)["id"] normal_instance_count = 1 cluster = context.system_paasta_config["cluster"] old_tasks = context.marathon_client.get_app(app_id).tasks marathon_serviceinit.restart_marathon_job( service, instance, app_id, normal_instance_count, context.marathon_client, cluster ) print "Sleeping 5 seconds to wait for %s to be restarted." % service time.sleep(5) new_tasks = context.marathon_client.get_app(app_id).tasks print "Tasks before the restart: %s" % old_tasks print "Tasks after the restart: %s" % new_tasks print # sacrificial line for behave to eat instead of our output assert old_tasks != new_tasks
def get_proxy_port_for_instance(name, instance, cluster=None, soa_dir=DEFAULT_SOA_DIR): """Get the proxy_port defined in the first namespace configuration for a service instance. This means that the namespace first has to be loaded from the service instance's configuration, and then the proxy_port has to loaded from the smartstack configuration for that namespace. :param name: The service name :param instance: The instance of the service :param cluster: The cluster to read the configuration for :param soa_dir: The SOA config directory to read from :returns: The proxy_port for the service instance, or None if not defined""" registration = read_registration_for_service_instance(name, instance, cluster, soa_dir) service, namespace, _, __ = decompose_job_id(registration) nerve_dict = load_service_namespace_config(service, namespace, soa_dir) return nerve_dict.get('proxy_port')
def main(): """Attempt to set up a list of marathon service instances given. Exits 1 if any service.instance deployment failed. This is done in the following order: - Load the marathon configuration - Connect to marathon - Do the following for each service.instance: - Load the service instance's configuration - Create the complete marathon job configuration - Deploy/bounce the service - Emit an event about the deployment to sensu""" args = parse_args() soa_dir = args.soa_dir if args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) # Setting up transparent cache for http API calls requests_cache.install_cache("setup_marathon_jobs", backend="memory") marathon_config = get_main_marathon_config() client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password()) marathon_apps = marathon_tools.get_all_marathon_apps(client, embed_failures=True) num_failed_deployments = 0 for service_instance in args.service_instance_list: try: service, instance, _, __ = decompose_job_id(service_instance) except InvalidJobNameError: log.error("Invalid service instance specified. Format is service%sinstance." % SPACER) num_failed_deployments = num_failed_deployments + 1 else: if deploy_marathon_service(service, instance, client, soa_dir, marathon_config, marathon_apps): num_failed_deployments = num_failed_deployments + 1 requests_cache.uninstall_cache() log.debug("%d out of %d service.instances failed to deploy." % (num_failed_deployments, len(args.service_instance_list))) sys.exit(1 if num_failed_deployments else 0)
def service_instance_status(context, app_count, job_id): marathon_config = marathon_tools.load_marathon_config() settings.marathon_client = marathon_tools.get_marathon_client( marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password() ) settings.cluster = load_system_paasta_config().get_cluster() settings.soa_dir = context.soa_dir (service, instance, _, __) = decompose_job_id(job_id) request = testing.DummyRequest() request.matchdict = {'service': service, 'instance': instance} response = instance_status(request) assert response['app_count'] == int(app_count), response assert response['marathon']['running_instance_count'] == response['marathon']['expected_instance_count'], response
def main(): args = parse_args() if args.debug: log.setLevel(logging.DEBUG) else: log.setLevel(logging.WARNING) command = args.command service_instance = args.service_instance service, instance, _, __ = decompose_job_id(service_instance) cluster = load_system_paasta_config().get_cluster() instance_type = validate_service_instance(service, instance, cluster, args.soa_dir) if instance_type == 'marathon': return_code = marathon_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, app_id=args.app_id, delta=args.delta, ) sys.exit(return_code) elif instance_type == 'chronos': return_code = chronos_serviceinit.perform_command( command=command, service=service, instance=instance, cluster=cluster, verbose=args.verbose, soa_dir=args.soa_dir, ) sys.exit(return_code) else: log.error("I calculated an instance_type of %s for %s which I don't know how to handle. Exiting." % (instance_type, compose_job_id(service, instance))) sys.exit(1)
def marathon_restart_gets_new_task_ids(context, job_id): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, soa_dir=context.soa_dir)['id'] normal_instance_count = 1 cluster = context.system_paasta_config['cluster'] old_tasks = context.marathon_client.get_app(app_id).tasks with mock.patch('paasta_tools.marathon_serviceinit._log', autospec=True): marathon_serviceinit.restart_marathon_job( service, instance, app_id, normal_instance_count, context.marathon_client, cluster ) paasta_print("Sleeping 5 seconds to wait for %s to be restarted." % service) time.sleep(5) new_tasks = context.marathon_client.get_app(app_id).tasks paasta_print("Tasks before the restart: %s" % old_tasks) paasta_print("Tasks after the restart: %s" % new_tasks) paasta_print() # sacrificial line for behave to eat instead of our output assert old_tasks != new_tasks
def main(): configure_log() args = parse_args() soa_dir = args.soa_dir if args.verbose: log.setLevel(logging.DEBUG) else: log.setLevel(logging.WARNING) try: service, instance, _, __ = decompose_job_id(args.service_instance, spacer=chronos_tools.INTERNAL_SPACER) print 'service, instance' print service, instance except InvalidJobNameError: log.error("Invalid service instance '%s' specified. Format is service%sinstance." % (args.service_instance, SPACER)) sys.exit(1) client = chronos_tools.get_chronos_client(chronos_tools.load_chronos_config()) cluster = load_system_paasta_config().get_cluster() try: complete_job_config = chronos_tools.create_complete_config( service=service, job_name=instance, soa_dir=soa_dir, ) except (NoDeploymentsAvailable, NoDockerImageError): error_msg = "No deployment found for %s in cluster %s. Has Jenkins run for it?" % ( args.service_instance, cluster) send_event( service=service, instance=None, soa_dir=soa_dir, status=pysensu_yelp.Status.CRITICAL, output=error_msg, ) log.error(error_msg) sys.exit(0) except chronos_tools.UnknownChronosJobError as e: error_msg = ( "Could not read chronos configuration file for %s in cluster %s\n" % (args.service_instance, cluster) + "Error was: %s" % str(e)) send_event( service=service, instance=instance, soa_dir=soa_dir, status=pysensu_yelp.Status.CRITICAL, output=error_msg, ) log.error(error_msg) sys.exit(0) except chronos_tools.InvalidParentError: log.warn("Skipping %s.%s: Parent job could not be found" % (service, instance)) sys.exit(0) status, output = setup_job( service=service, instance=instance, cluster=cluster, complete_job_config=complete_job_config, client=client, ) sensu_status = pysensu_yelp.Status.CRITICAL if status else pysensu_yelp.Status.OK send_event( service=service, instance=instance, soa_dir=soa_dir, status=sensu_status, output=output, ) # We exit 0 because the script finished ok and the event was sent to the right team. sys.exit(0)
def wait_launch_tasks(context, job_id, task_count): (service, instance, _, __) = decompose_job_id(job_id) app_id = marathon_tools.create_complete_config(service, instance, None, soa_dir=context.soa_dir)['id'] client = context.marathon_client marathon_tools.wait_for_app_to_launch_tasks(client, app_id, task_count, exact_matches_only=True)