Exemplo n.º 1
0
def get_workload_from_kubernetes(identifier) -> Optional[KubernetesWorkload]:
    if not managers_are_initialized():
        log.error(
            "Cannot get workload from kubernetes because managers aren't initialized"
        )
        return None

    retry_count = get_config_manager().get_int(
        GET_WORKLOAD_RETRY_COUNT, DEFAULT_GET_WORKLOAD_RETRY_COUNT)
    retry_interval = get_config_manager().get_float(
        GET_WORKLOAD_RETRY_INTERVAL_SEC,
        DEFAULT_GET_WORKLOAD_RETRY_INTERVAL_SEC)

    pod_manager = get_pod_manager()
    for i in range(retry_count):
        log.info("Getting pod from kubernetes: %s", identifier)
        pod = pod_manager.get_pod(identifier)
        if pod is not None:
            log.info("Got pod from kubernetes: %s", identifier)
            return KubernetesWorkload(pod)

        log.info("Retrying getting pod from kubernetes in %s seconds",
                 retry_interval)
        time.sleep(retry_interval)

    log.error("Failed to get pod from kubernetes: %s", identifier)
    return None
Exemplo n.º 2
0
    def get_cpu_predictions(
            self, workloads: List[Workload],
            resource_usage: GlobalResourceUsage) -> Optional[Dict[str, float]]:
        pod_manager = get_pod_manager()
        if pod_manager is None:
            return None

        pods = []
        for w in workloads:
            pod = pod_manager.get_pod(w.get_id())
            if pod is None:
                log.warning("Failed to get pod for workload: %s", w.get_id())
            else:
                pods.append(pod)

        resource_usage_predictions = self.get_predictions(pods, resource_usage)

        predictions = {}
        if resource_usage_predictions is None:
            log.error("Got no resource usage predictions")
            return predictions
        else:
            log.info("Got resource usage predictions: %s",
                     json.dumps(resource_usage_predictions.raw))

        for w_id, prediction in resource_usage_predictions.predictions.items():
            predictions[w_id] = get_first_window_cpu_prediction(prediction)

        return predictions
Exemplo n.º 3
0
    def test_free_cpu_on_container_die(self):
        registry = Registry()
        test_pod = get_simple_test_pod()
        get_pod_manager().set_pod(test_pod)
        workload_name = test_pod.metadata.name

        events = [
            get_container_create_event(DEFAULT_CPU_COUNT, STATIC,
                                       workload_name, workload_name),
            get_container_die_event(workload_name)
        ]
        event_count = len(events)
        event_iterable = MockEventProvider(
            events, 1)  # Force in order event processing for the test

        test_context = TestContext()
        manager = EventManager(event_iterable,
                               test_context.get_event_handlers(),
                               DEFAULT_TEST_EVENT_TIMEOUT_SECS)
        manager.set_registry(registry, {})
        manager.start_processing_events()

        wait_until(lambda: event_count == manager.get_processed_count())
        self.assertEqual(0, manager.get_queue_depth())
        self.assertEqual(DEFAULT_TOTAL_THREAD_COUNT,
                         len(test_context.get_cpu().get_empty_threads()))
        self.assertEqual(
            1,
            test_context.get_create_event_handler().get_handled_event_count())
        self.assertEqual(
            1,
            test_context.get_free_event_handler().get_handled_event_count())

        manager.stop_processing_events()

        manager.report_metrics({})
        self.assertTrue(gauge_value_equals(registry, QUEUE_DEPTH_KEY, 0))
        self.assertTrue(
            counter_value_equals(
                registry, EVENT_SUCCEEDED_KEY,
                event_count * len(test_context.get_event_handlers())))
        self.assertTrue(counter_value_equals(registry, EVENT_FAILED_KEY, 0))
        self.assertTrue(
            counter_value_equals(registry, EVENT_PROCESSED_KEY, event_count))
Exemplo n.º 4
0
    log.info("Setting up event handlers...")
    reconciler = Reconciler(cgroup_manager, exit_handler)
    create_event_handler = CreateEventHandler(workload_manager)
    free_event_handler = FreeEventHandler(workload_manager)
    rebalance_event_handler = RebalanceEventHandler(workload_manager)
    reconcile_event_handler = ReconcileEventHandler(reconciler)
    oversub_event_handler = None
    predicted_usage_handler = None
    if is_kubernetes():
        oversub_event_handler = OversubscribeEventHandler(
            workload_manager,
            KubernetesOpportunisticWindowPublisher(exit_handler))
        predicted_usage_handler = ResourceUsagePredictionHandler(
            KubernetesPredictedUsagePublisher(
                resource_usage_predictor=ResourceUsagePredictor(),
                pod_manager=get_pod_manager(),
                workload_monitor_manager=workload_monitor_manager))

    event_handlers = [
        h for h in [
            create_event_handler, free_event_handler, rebalance_event_handler,
            reconcile_event_handler, oversub_event_handler,
            predicted_usage_handler
        ] if h is not None
    ]

    # Start event processing
    log.info("Starting Docker event handling...")
    event_manager = EventManager(docker.from_env().events(), event_handlers)
    set_event_manager(event_manager)