def sync(self): if self.running: self.log.debug('self.running: %s', self.running) if self.queued_tasks: self.log.debug('self.queued: %s', self.queued_tasks) self.kube_scheduler.sync() last_resource_version = None while not self.result_queue.empty(): results = self.result_queue.get() key, state, pod_id, resource_version = results last_resource_version = resource_version self.log.info('Changing state of %s to %s', results, state) try: self._change_state(key, state, pod_id) except Exception as e: self.log.exception('Exception: %s when attempting ' + 'to change state of %s to %s, re-queueing.', e, results, state) self.result_queue.put(results) KubeResourceVersion.checkpoint_resource_version(last_resource_version) for i in range(min((self.kube_config.worker_pods_creation_batch_size, self.task_queue.qsize()))): task = self.task_queue.get() try: self.kube_scheduler.run_next(task) except ApiException: self.log.exception('ApiException when attempting ' + 'to run task, re-queueing.') self.task_queue.put(task)
def sync(self) -> None: """Synchronize task state.""" if self.running: self.log.debug('self.running: %s', self.running) if self.queued_tasks: self.log.debug('self.queued: %s', self.queued_tasks) if not self.worker_uuid: raise AirflowException(NOT_STARTED_MESSAGE) if not self.kube_scheduler: raise AirflowException(NOT_STARTED_MESSAGE) if not self.kube_config: raise AirflowException(NOT_STARTED_MESSAGE) if not self.result_queue: raise AirflowException(NOT_STARTED_MESSAGE) if not self.task_queue: raise AirflowException(NOT_STARTED_MESSAGE) self.kube_scheduler.sync() last_resource_version = None while True: # pylint: disable=too-many-nested-blocks try: results = self.result_queue.get_nowait() try: key, state, pod_id, namespace, resource_version = results last_resource_version = resource_version self.log.info('Changing state of %s to %s', results, state) try: self._change_state(key, state, pod_id, namespace) except Exception as e: # pylint: disable=broad-except self.log.exception( "Exception: %s when attempting to change state of %s to %s, re-queueing.", e, results, state ) self.result_queue.put(results) finally: self.result_queue.task_done() except Empty: break KubeResourceVersion.checkpoint_resource_version(last_resource_version) # pylint: disable=too-many-nested-blocks for _ in range(self.kube_config.worker_pods_creation_batch_size): try: task = self.task_queue.get_nowait() try: self.kube_scheduler.run_next(task) except ApiException as e: if e.reason == "BadRequest": self.log.error("Request was invalid. Failing task") key, _, _ = task self.change_state(key, State.FAILED, e) else: self.log.warning('ApiException when attempting to run task, re-queueing. ' 'Message: %s', json.loads(e.body)['message']) self.task_queue.put(task) finally: self.task_queue.task_done() except Empty: break
def sync(self): if self.running: self.log.debug('self.running: %s', self.running) if self.queued_tasks: self.log.debug('self.queued: %s', self.queued_tasks) self.kube_scheduler.sync() last_resource_version = None while not self.result_queue.empty(): results = self.result_queue.get() key, state, pod_id, resource_version = results last_resource_version = resource_version self.log.info('Changing state of %s to %s', results, state) try: self._change_state(key, state, pod_id) except Exception as e: self.log.exception( 'Exception: %s when attempting ' + 'to change state of %s to %s, re-queueing.', e, results, state) self.result_queue.put(results) KubeResourceVersion.checkpoint_resource_version(last_resource_version) for i in range( min((self.kube_config.worker_pods_creation_batch_size, self.task_queue.qsize()))): task = self.task_queue.get() try: self.kube_scheduler.run_next(task) except ApiException: self.log.exception('ApiException when attempting ' + 'to run task, re-queueing.') self.task_queue.put(task)
def sync(self): if self.running: self.log.debug('self.running: %s', self.running) if self.queued_tasks: self.log.debug('self.queued: %s', self.queued_tasks) self.kube_scheduler.sync() last_resource_version = None while not self.result_queue.empty(): results = self.result_queue.get() key, state, pod_id, resource_version = results last_resource_version = resource_version self.log.info('Changing state of %s to %s', results, state) self._change_state(key, state, pod_id) KubeResourceVersion.checkpoint_resource_version(last_resource_version) if not self.task_queue.empty(): task = self.task_queue.get() try: self.kube_scheduler.run_next(task) except ApiException: self.log.exception('ApiException when attempting ' + 'to run task, re-queueing.') self.task_queue.put(task)
def sync(self): """Synchronize task state.""" if self.running: self.log.debug('self.running: %s', self.running) if self.queued_tasks: self.log.debug('self.queued: %s', self.queued_tasks) self.kube_scheduler.sync() last_resource_version = None while True: try: results = self.result_queue.get_nowait() try: key, state, pod_id, namespace, resource_version = results last_resource_version = resource_version self.log.info('Changing state of %s to %s', results, state) try: self._change_state(key, state, pod_id, namespace) except Exception as e: self.log.exception( 'Exception: %s when attempting ' + 'to change state of %s to %s, re-queueing.', e, results, state) self.result_queue.put(results) finally: self.result_queue.task_done() except Empty: break KubeResourceVersion.checkpoint_resource_version(last_resource_version) for _ in range(self.kube_config.worker_pods_creation_batch_size): try: task = self.task_queue.get_nowait() try: self.kube_scheduler.run_next(task) except ApiException as e: self.log.warning( 'ApiException when attempting to run task, re-queueing. ' 'Message: %s', json.loads(e.body)['message']) self.task_queue.put(task) except HTTPError as e: self.log.warning( 'HTTPError when attempting to run task, re-queueing. ' 'Exception: %s', str(e)) self.task_queue.put(task) finally: self.task_queue.task_done() except Empty: break
def sync(self) -> None: """Synchronize task state.""" if self.running: self.log.debug('self.running: %s', self.running) if self.queued_tasks: self.log.debug('self.queued: %s', self.queued_tasks) assert self.kube_scheduler, NOT_STARTED_MESSAGE assert self.kube_config, NOT_STARTED_MESSAGE assert self.result_queue, NOT_STARTED_MESSAGE assert self.task_queue, NOT_STARTED_MESSAGE self.kube_scheduler.sync() last_resource_version = None while True: # pylint: disable=too-many-nested-blocks try: results = self.result_queue.get_nowait() try: key, state, pod_id, resource_version = results last_resource_version = resource_version self.log.info('Changing state of %s to %s', results, state) try: self._change_state(key, state, pod_id) except Exception as e: # pylint: disable=broad-except self.log.exception( 'Exception: %s when attempting ' + 'to change state of %s to %s, re-queueing.', e, results, state) self.result_queue.put(results) finally: self.result_queue.task_done() except Empty: break KubeResourceVersion.checkpoint_resource_version(last_resource_version) # pylint: disable=too-many-nested-blocks for _ in range(self.kube_config.worker_pods_creation_batch_size): try: task = self.task_queue.get_nowait() try: self.kube_scheduler.run_next(task) except ApiException as e: self.log.warning( 'ApiException when attempting to run task, re-queueing. ' 'Message: %s' % json.loads(e.body)['message']) self.task_queue.put(task) finally: self.task_queue.task_done() except Empty: break
def _make_kube_watcher(self): resource_version = KubeResourceVersion.get_current_resource_version() watcher = KubernetesJobWatcher(self.namespace, self.watcher_queue, resource_version, self.worker_uuid, self.kube_config) watcher.start() return watcher
def _make_kube_watcher(self) -> KubernetesJobWatcher: resource_version = KubeResourceVersion.get_current_resource_version() watcher = KubernetesJobWatcher(watcher_queue=self.watcher_queue, resource_version=resource_version, worker_uuid=self.worker_uuid, kube_config=self.kube_config) watcher.start() return watcher
def start(self): self.log.info('Start Kubernetes executor') self.worker_uuid = KubeWorkerIdentifier.get_or_create_current_kube_worker_uuid( ) self.log.debug('Start with worker_uuid: %s', self.worker_uuid) # always need to reset resource version since we don't know # when we last started, note for behavior below # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs # /CoreV1Api.md#list_namespaced_pod KubeResourceVersion.reset_resource_version() self.task_queue = self._manager.Queue() self.result_queue = self._manager.Queue() self.kube_client = get_kube_client() self.kube_scheduler = AirflowKubernetesScheduler( self.kube_config, self.task_queue, self.result_queue, self.kube_client, self.worker_uuid) self._inject_secrets() self.clear_not_launched_queued_tasks()
def start(self): self.log.info('Start Kubernetes executor') self.worker_uuid = KubeWorkerIdentifier.get_or_create_current_kube_worker_uuid() self.log.debug('Start with worker_uuid: %s', self.worker_uuid) # always need to reset resource version since we don't know # when we last started, note for behavior below # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs # /CoreV1Api.md#list_namespaced_pod KubeResourceVersion.reset_resource_version() self.task_queue = Queue() self.result_queue = Queue() self.kube_client = get_kube_client() self.kube_scheduler = AirflowKubernetesScheduler( self.kube_config, self.task_queue, self.result_queue, self.kube_client, self.worker_uuid ) self._inject_secrets() self.clear_not_launched_queued_tasks()
def sync(self): self.log.info("self.running: {}".format(self.running)) self.log.info("self.queued: {}".format(self.queued_tasks)) self.kube_scheduler.sync() last_resource_version = None while not self.result_queue.empty(): results = self.result_queue.get() key, state, pod_id, resource_version = results last_resource_version = resource_version self.log.info("Changing state of {} to {}".format(results, state)) self._change_state(key, state, pod_id) KubeResourceVersion.checkpoint_resource_version( last_resource_version, session=self._session) if not self.task_queue.empty(): key, command, kube_executor_config = self.task_queue.get() self.kube_scheduler.run_next((key, command, kube_executor_config))
def sync(self): if self.running: self.log.debug('self.running: %s', self.running) if self.queued_tasks: self.log.debug('self.queued: %s', self.queued_tasks) self.kube_scheduler.sync() last_resource_version = None while not self.result_queue.empty(): results = self.result_queue.get() key, state, pod_id, resource_version = results last_resource_version = resource_version self.log.info('Changing state of %s to %s', results, state) self._change_state(key, state, pod_id) KubeResourceVersion.checkpoint_resource_version(last_resource_version) if not self.task_queue.empty(): key, command, kube_executor_config = self.task_queue.get() self.kube_scheduler.run_next((key, command, kube_executor_config))
def _make_kube_watcher(self): resource_version = KubeResourceVersion.get_current_resource_version() watcher = KubernetesJobWatcher( watcher_queue=self.watcher_queue, namespace=self.kube_config.kube_namespace, multi_namespace_mode=self.kube_config.multi_namespace_mode, resource_version=resource_version, worker_uuid=self.worker_uuid, kube_config=self.kube_config) watcher.start() return watcher
def sync(self): if self.running: self.log.info('self.running: %s', self.running) if self.queued_tasks: self.log.info('self.queued: %s', self.queued_tasks) self.kube_scheduler.sync() last_resource_version = None while not self.result_queue.empty(): results = self.result_queue.get() key, state, pod_id, resource_version = results last_resource_version = resource_version self.log.info('Changing state of %s to %s', results, state) self._change_state(key, state, pod_id) KubeResourceVersion.checkpoint_resource_version( last_resource_version, session=self._session) if not self.task_queue.empty(): key, command, kube_executor_config = self.task_queue.get() self.kube_scheduler.run_next((key, command, kube_executor_config))
def test_reset_resource_version(self): session = settings.Session() version = KubeResourceVersion.reset_resource_version(session) self.assertEqual(version, '0') self.assertEqual( KubeResourceVersion.get_current_resource_version(session), '0')
def test_checkpoint_resource_version(self): session = settings.Session() KubeResourceVersion.checkpoint_resource_version('7', session) self.assertEqual( KubeResourceVersion.get_current_resource_version(session), '7')
def test_reset_resource_version(self): session = settings.Session() version = KubeResourceVersion.reset_resource_version(session) self.assertEqual(version, '0') self.assertEqual(KubeResourceVersion.get_current_resource_version(session), '0')
def _make_kube_watcher(self): resource_version = KubeResourceVersion.get_current_resource_version(self._session) watcher = KubernetesJobWatcher(self.namespace, self.watcher_queue, resource_version, self.worker_uuid) watcher.start() return watcher
def test_checkpoint_resource_version(self): session = settings.Session() KubeResourceVersion.checkpoint_resource_version('7', session) self.assertEqual(KubeResourceVersion.get_current_resource_version(session), '7')