def test_handle_k8s_events_job_statuses_for_non_existing_job(self): assert self.STATUS_MODEL.objects.count() == 0 job_state = get_job_state( event_type=self.EVENT['type'], # pylint:disable=unsubscriptable-object event=self.EVENT['object'], # pylint:disable=unsubscriptable-object job_container_names=(self.CONTAINER_NAME, ), experiment_type_label=settings.TYPE_LABELS_RUNNER) self.STATUS_HANDLER(job_state.to_dict()) # pylint:disable=not-callable assert self.STATUS_MODEL.objects.count() == 0
def test_handle_k8s_events_job_statuses_for_non_existing_job(self): assert self.STATUS_MODEL.objects.count() == 0 job_state = get_job_state( event_type=self.EVENT['type'], # pylint:disable=unsubscriptable-object event=self.EVENT['object'], # pylint:disable=unsubscriptable-object created_at=timezone.now() + datetime.timedelta(days=1), job_container_names=(self.CONTAINER_NAME, ), experiment_type_label=conf.get('TYPE_LABELS_RUNNER')) self.STATUS_HANDLER(job_state.to_dict()) # pylint:disable=not-callable assert self.STATUS_MODEL.objects.count() == 0
def run(k8s_manager): w = watch.Watch() for event in w.stream(k8s_manager.k8s_api.list_namespaced_pod, namespace=k8s_manager.namespace, label_selector=get_label_selector()): logger.debug("Received event: %s", event['type']) event_object = event['object'].to_dict() job_state = get_job_state( event_type=event['type'], event=event_object, job_container_names=(settings.CONTAINER_NAME_JOB, settings.CONTAINER_NAME_PLUGIN_JOB, settings.CONTAINER_NAME_DOCKERIZER_JOB), experiment_type_label=settings.TYPE_LABELS_EXPERIMENT) if job_state: status = job_state.status labels = None if job_state.details and job_state.details.labels: labels = job_state.details.labels.to_dict() logger.info("Updating job container %s, %s", status, labels) logger.debug(event_object) job_state = job_state.to_dict() logger.debug(job_state) # Only update job containers if it's an experiment job not plugins experiment_job_condition = ( settings.CONTAINER_NAME_JOB in job_state['details']['container_statuses']) plugin_job_condition = ( settings.CONTAINER_NAME_PLUGIN_JOB in job_state['details']['container_statuses']) dockerizer_job_condition = ( settings.CONTAINER_NAME_DOCKERIZER_JOB in job_state['details']['container_statuses']) if experiment_job_condition: update_job_containers(event_object, status, settings.CONTAINER_NAME_JOB) # Handle experiment job statuses differently than plugin job statuses celery_app.send_task( EventsCeleryTasks.EVENTS_HANDLE_EXPERIMENT_JOB_STATUSES, kwargs={'payload': job_state}) elif plugin_job_condition: # Handle plugin job statuses celery_app.send_task( EventsCeleryTasks.EVENTS_HANDLE_PLUGIN_JOB_STATUSES, kwargs={'payload': job_state}) elif dockerizer_job_condition: # Handle dockerizer job statuses celery_app.send_task( EventsCeleryTasks.EVENTS_HANDLE_BUILD_JOB_STATUSES, kwargs={'payload': job_state})
def test_handle_events_job_statuses_for_existing_job_with_known_conditions(self): assert self.STATUS_MODEL.objects.count() == 0 job_state = get_job_state( event_type=self.EVENT_WITH_CONDITIONS['type'], # pylint:disable=unsubscriptable-object event=self.EVENT_WITH_CONDITIONS['object'], # pylint:disable=unsubscriptable-object job_container_names=(self.CONTAINER_NAME,), experiment_type_label=settings.TYPE_LABELS_EXPERIMENT) job = self.get_job_object(job_state) self.STATUS_HANDLER(job_state.to_dict()) # pylint:disable=not-callable assert self.STATUS_MODEL.objects.count() == 2 statuses = self.STATUS_MODEL.objects.filter(job=job).values_list('status', flat=True) assert set(statuses) == {JobLifeCycle.CREATED, JobLifeCycle.FAILED}
def test_handle_k8s_events_job_statuses_for_existing_job_with_unknown_conditions(self): assert self.STATUS_MODEL.objects.count() == 0 job_state = get_job_state( event_type=self.EVENT['type'], # pylint:disable=unsubscriptable-object event=self.EVENT['object'], # pylint:disable=unsubscriptable-object created_at=timezone.now() + datetime.timedelta(days=1), job_container_names=(self.CONTAINER_NAME,), experiment_type_label=settings.TYPE_LABELS_RUNNER) job = self.get_job_object(job_state) self.STATUS_HANDLER(job_state.to_dict()) # pylint:disable=not-callable assert self.STATUS_MODEL.objects.count() == 2 statuses = self.STATUS_MODEL.objects.filter(job=job).values_list('status', flat=True) assert set(statuses) == {JobLifeCycle.CREATED, JobLifeCycle.UNKNOWN}
def test_get_failed_job_state(self): job_state = get_job_state(event_type=status_experiment_job_event_with_conditions['type'], event=status_experiment_job_event_with_conditions['object'], job_container_names=(settings.CONTAINER_NAME_EXPERIMENT_JOB,), experiment_type_label=settings.TYPE_LABELS_RUNNER) assert isinstance(job_state, JobStateConfig) assert isinstance(job_state.details, PodStateConfig) assert job_state.details.event_type == EventTypes.ADDED assert job_state.details.phase == PodLifeCycle.FAILED labels = status_experiment_job_event_with_conditions['object']['metadata']['labels'] assert job_state.details.labels.to_dict() == labels assert job_state.details.deletion_timestamp is None assert set(job_state.details.pod_conditions.keys()) == set(PodConditions.VALUES) assert set(job_state.details.container_statuses.keys()) == { settings.CONTAINER_NAME_EXPERIMENT_JOB, } assert job_state.status == JobLifeCycle.FAILED assert job_state.message is None
def test_get_pending_job_state(self): job_state = get_job_state( event_type=status_experiment_job_event['type'], event=status_experiment_job_event['object'], job_container_names=(settings.CONTAINER_NAME_EXPERIMENT_JOB,), experiment_type_label=settings.TYPE_LABELS_RUNNER) assert isinstance(job_state, JobStateConfig) assert isinstance(job_state.details, PodStateConfig) assert job_state.details.event_type == EventTypes.ADDED assert job_state.details.phase == PodLifeCycle.PENDING assert job_state.details.labels.to_dict() == status_experiment_job_event[ 'object']['metadata']['labels'] assert job_state.details.deletion_timestamp is None assert job_state.details.pod_conditions is None assert job_state.details.container_statuses == {} assert job_state.status == JobLifeCycle.UNKNOWN assert job_state.message == 'Unknown pod conditions'
def test_get_failed_job_state(self): job_state = get_job_state(event_type=status_experiment_job_event_with_conditions['type'], event=status_experiment_job_event_with_conditions['object'], job_container_names=(settings.CONTAINER_NAME_EXPERIMENT_JOB,), experiment_type_label=settings.TYPE_LABELS_EXPERIMENT) assert isinstance(job_state, JobStateConfig) assert isinstance(job_state.details, PodStateConfig) assert job_state.details.event_type == EventTypes.ADDED assert job_state.details.phase == PodLifeCycle.FAILED labels = status_experiment_job_event_with_conditions['object']['metadata']['labels'] assert job_state.details.labels.to_dict() == labels assert job_state.details.deletion_timestamp is None assert set(job_state.details.pod_conditions.keys()) == set(PodConditions.VALUES) assert set(job_state.details.container_statuses.keys()) == { settings.CONTAINER_NAME_EXPERIMENT_JOB, } assert job_state.status == JobLifeCycle.FAILED assert job_state.message is None
def test_get_pending_job_state(self): job_state = get_job_state( event_type=status_experiment_job_event['type'], event=status_experiment_job_event['object'], job_container_names=(settings.CONTAINER_NAME_EXPERIMENT_JOB,), experiment_type_label=settings.TYPE_LABELS_EXPERIMENT) assert isinstance(job_state, JobStateConfig) assert isinstance(job_state.details, PodStateConfig) assert job_state.details.event_type == EventTypes.ADDED assert job_state.details.phase == PodLifeCycle.PENDING assert job_state.details.labels.to_dict() == status_experiment_job_event[ 'object']['metadata']['labels'] assert job_state.details.deletion_timestamp is None assert job_state.details.pod_conditions is None assert job_state.details.container_statuses == {} assert job_state.status == JobLifeCycle.UNKNOWN assert job_state.message == 'Unknown pod conditions'
def run(k8s_manager): w = watch.Watch() for event in w.stream(k8s_manager.k8s_api.list_namespaced_pod, namespace=k8s_manager.namespace, label_selector=get_label_selector()): logger.debug("Received event: %s", event['type']) event_object = event['object'].to_dict() job_state = get_job_state( event_type=event['type'], event=event_object, job_container_names=(settings.CONTAINER_NAME_EXPERIMENT_JOB, settings.CONTAINER_NAME_PLUGIN_JOB, settings.CONTAINER_NAME_DOCKERIZER_JOB), experiment_type_label=settings.TYPE_LABELS_EXPERIMENT) if job_state: status = job_state.status labels = None if job_state.details and job_state.details.labels: labels = job_state.details.labels.to_dict() logger.info("Updating job container %s, %s", status, labels) logger.debug(event_object) job_state = job_state.to_dict() logger.debug(job_state) # Only update job containers if it's an experiment job not plugins experiment_job_condition = ( settings.CONTAINER_NAME_EXPERIMENT_JOB in job_state['details']['container_statuses'] ) job_condition = ( settings.CONTAINER_NAME_JOB in job_state['details']['container_statuses'] ) plugin_job_condition = ( settings.CONTAINER_NAME_PLUGIN_JOB in job_state['details']['container_statuses'] ) dockerizer_job_condition = ( settings.CONTAINER_NAME_DOCKERIZER_JOB in job_state['details']['container_statuses'] ) if experiment_job_condition: update_job_containers(event_object, status, settings.CONTAINER_NAME_EXPERIMENT_JOB) # Handle experiment job statuses differently than plugin job statuses celery_app.send_task( EventsCeleryTasks.EVENTS_HANDLE_EXPERIMENT_JOB_STATUSES, kwargs={'payload': job_state}) elif job_condition: update_job_containers(event_object, status, settings.CONTAINER_NAME_JOB) # Handle experiment job statuses differently than plugin job statuses celery_app.send_task( EventsCeleryTasks.EVENTS_HANDLE_JOB_STATUSES, kwargs={'payload': job_state}) elif plugin_job_condition: # Handle plugin job statuses celery_app.send_task( EventsCeleryTasks.EVENTS_HANDLE_PLUGIN_JOB_STATUSES, kwargs={'payload': job_state}) elif dockerizer_job_condition: # Handle dockerizer job statuses celery_app.send_task( EventsCeleryTasks.EVENTS_HANDLE_BUILD_JOB_STATUSES, kwargs={'payload': job_state})
def run(k8s_manager): w = watch.Watch() for event in w.stream(k8s_manager.k8s_api.list_namespaced_pod, namespace=k8s_manager.namespace, label_selector=get_label_selector()): created_at = timezone.now() logger.debug("Received event: %s", event['type']) event_object = event['object'].to_dict() logger.debug(event_object) job_state = get_job_state( event_type=event['type'], event=event_object, job_container_names=(settings.CONTAINER_NAME_EXPERIMENT_JOB, settings.CONTAINER_NAME_PLUGIN_JOB, settings.CONTAINER_NAME_JOB, settings.CONTAINER_NAME_DOCKERIZER_JOB), experiment_type_label=settings.TYPE_LABELS_RUNNER, created_at=created_at) if job_state: status = job_state.status labels = None if job_state.details and job_state.details.labels: labels = job_state.details.labels.to_dict() logger.info("Updating job container %s, %s", status, labels) logger.debug(event_object) job_state = job_state.to_dict() logger.debug(job_state) experiment_job_condition = ( settings.CONTAINER_NAME_EXPERIMENT_JOB in job_state['details']['container_statuses'] or (status and labels['app'] == settings.APP_LABELS_EXPERIMENT)) job_condition = (settings.CONTAINER_NAME_JOB in job_state['details']['container_statuses'] or (status and labels['app'] == settings.APP_LABELS_JOB)) plugin_job_condition = ( settings.CONTAINER_NAME_PLUGIN_JOB in job_state['details']['container_statuses'] or (status and labels['app'] in (settings.APP_LABELS_TENSORBOARD, settings.APP_LABELS_NOTEBOOK))) dockerizer_job_condition = ( settings.CONTAINER_NAME_DOCKERIZER_JOB in job_state['details']['container_statuses'] or (status and labels['app'] == settings.APP_LABELS_DOCKERIZER)) if experiment_job_condition: update_job_containers(event_object, status, settings.CONTAINER_NAME_EXPERIMENT_JOB) logger.debug("Sending state to handler %s, %s", status, labels) # Handle experiment job statuses celery_app.send_task(K8SEventsCeleryTasks. K8S_EVENTS_HANDLE_EXPERIMENT_JOB_STATUSES, kwargs={'payload': job_state}) elif job_condition: update_job_containers(event_object, status, settings.CONTAINER_NAME_JOB) logger.debug("Sending state to handler %s, %s", status, labels) # Handle experiment job statuses celery_app.send_task( K8SEventsCeleryTasks.K8S_EVENTS_HANDLE_JOB_STATUSES, kwargs={'payload': job_state}) elif plugin_job_condition: logger.debug("Sending state to handler %s, %s", status, labels) # Handle plugin job statuses celery_app.send_task( K8SEventsCeleryTasks.K8S_EVENTS_HANDLE_PLUGIN_JOB_STATUSES, kwargs={'payload': job_state}) elif dockerizer_job_condition: logger.debug("Sending state to handler %s, %s", status, labels) # Handle dockerizer job statuses celery_app.send_task( K8SEventsCeleryTasks.K8S_EVENTS_HANDLE_BUILD_JOB_STATUSES, kwargs={'payload': job_state}) else: logger.info("Lost state %s, %s", status, job_state)