def test_get_all_running_tasks(): with mock.patch( 'paasta_tools.mesos_tools.get_current_tasks', autospec=True, ) as mock_get_current_tasks, mock.patch( 'paasta_tools.mesos_tools.filter_running_tasks', autospec=True, ) as mock_filter_running_tasks, mock.patch( 'paasta_tools.mesos_tools.get_mesos_master', autospec=True, ) as mock_get_mesos_master: mock_task_1 = mock.Mock() mock_task_2 = mock.Mock() mock_task_3 = mock.Mock() mock_get_current_tasks.return_value = [mock_task_1, mock_task_2] mock_orphan_tasks = mock.Mock(return_value=[mock_task_3]) mock_mesos_master = mock.Mock(orphan_tasks=mock_orphan_tasks) mock_get_mesos_master.return_value = mock_mesos_master ret = mesos_tools.get_all_running_tasks() mock_get_current_tasks.assert_called_with('') mock_filter_running_tasks.assert_called_with( [mock_task_1, mock_task_2, mock_task_3]) assert ret == mock_filter_running_tasks.return_value
def autoscale_services(soa_dir=DEFAULT_SOA_DIR): if autoscaling_is_paused(): log.warning("Skipping autoscaling because autoscaler paused") return try: with create_autoscaling_lock(): system_paasta_config = load_system_paasta_config() cluster = system_paasta_config.get_cluster() configs = get_configs_of_services_to_scale(cluster=cluster, soa_dir=soa_dir) marathon_clients = get_marathon_clients( get_marathon_servers(system_paasta_config)) apps_with_clients = get_marathon_apps_with_clients( marathon_clients.get_all_clients(), embed_tasks=True) all_mesos_tasks = get_all_running_tasks() if configs: with ZookeeperPool(): for config in configs: try: marathon_tasks, mesos_tasks = filter_autoscaling_tasks( [app for (app, client) in apps_with_clients], all_mesos_tasks, config, ) autoscale_marathon_instance( config, list(marathon_tasks.values()), mesos_tasks) except Exception as e: write_to_log(config=config, line='Caught Exception %s' % e) except LockHeldException: log.warning( "Skipping autoscaling run for services because the lock is held")
def autoscale_services(soa_dir=DEFAULT_SOA_DIR): try: with create_autoscaling_lock(): cluster = load_system_paasta_config().get_cluster() configs = get_configs_of_services_to_scale(cluster=cluster, soa_dir=soa_dir) if configs: marathon_config = load_marathon_config() marathon_client = get_marathon_client( url=marathon_config.get_url(), user=marathon_config.get_username(), passwd=marathon_config.get_password()) all_marathon_tasks = marathon_client.list_tasks() all_mesos_tasks = get_all_running_tasks() with ZookeeperPool(): for config in configs: try: job_id = config.format_marathon_app_dict()['id'] # Get a dict of healthy tasks, we assume tasks with no healthcheck defined # are healthy. We assume tasks with no healthcheck results but a defined # healthcheck to be unhealthy (unless they are "old" in which case we # assume that marathon has screwed up and stopped healthchecking but that # they are healthy log.info("Inspecting %s for autoscaling" % job_id) marathon_tasks = { task.id: task for task in all_marathon_tasks if task.id.startswith(job_id) and (is_task_healthy(task) or not marathon_client. get_app(task.app_id).health_checks or is_old_task_missing_healthchecks( task, marathon_client)) } if not marathon_tasks: raise MetricsProviderNoDataError( "Couldn't find any healthy marathon tasks") mesos_tasks = [ task for task in all_mesos_tasks if task['id'] in marathon_tasks ] autoscale_marathon_instance( config, list(marathon_tasks.values()), mesos_tasks) except Exception as e: write_to_log(config=config, line='Caught Exception %s' % e) except LockHeldException: log.warning( "Skipping autoscaling run for services because the lock is held")
def get_autoscaling_info(marathon_clients, service_config): if service_config.get_max_instances() and service_config.get_desired_state( ) == 'start': apps_with_clients = get_marathon_apps_with_clients( marathon_clients.get_all_clients(), embed_tasks=True) all_mesos_tasks = get_all_running_tasks() autoscaling_params = service_config.get_autoscaling_params() autoscaling_params.update({'noop': True}) system_paasta_config = load_system_paasta_config() try: marathon_tasks, mesos_tasks = filter_autoscaling_tasks( [app for (app, client) in apps_with_clients], all_mesos_tasks, service_config, ) utilization = get_utilization( marathon_service_config=service_config, system_paasta_config=system_paasta_config, autoscaling_params=autoscaling_params, log_utilization_data={}, marathon_tasks=list(marathon_tasks.values()), mesos_tasks=mesos_tasks, ) error = get_error_from_utilization( utilization=utilization, setpoint=autoscaling_params['setpoint'], current_instances=service_config.get_instances(), ) new_instance_count = get_new_instance_count( utilization=utilization, error=error, autoscaling_params=autoscaling_params, current_instances=service_config.get_instances(), marathon_service_config=service_config, num_healthy_instances=len(marathon_tasks), ) current_utilization = "{:.1f}%".format(utilization * 100) except MetricsProviderNoDataError: current_utilization = "Exception" new_instance_count = "Exception" return ServiceAutoscalingInfo( current_instances=str(service_config.get_instances()), max_instances=str(service_config.get_max_instances()), min_instances=str(service_config.get_min_instances()), current_utilization=current_utilization, target_instances=str(new_instance_count), ) return None
def get_all_marathon_mesos_tasks(marathon_client): all_marathon_tasks = marathon_client.list_tasks() all_mesos_tasks = get_all_running_tasks() return all_marathon_tasks, all_mesos_tasks