def wait_for_change(list_func, col_lists, table_name): cursor = conn.cursor() cursor.execute("DROP TABLE IF EXISTS %s" % table_name) cursor.execute(create_sql(table_name, col_lists)) w = watch.Watch() stream = w.stream(list_func) for event in stream: eventType = event['type'] pod = event['object'] if eventType == "ADDED": sql = insert_object_to_db(table_name, pod, col_lists) elif eventType == "MODIFIED": sql = update_object_to_db(table_name, pod, col_lists) elif eventType == "DELETED": sql = delete_object_in_db(table_name, pod, col_lists) # print sql try: cursor.execute(*sql) conn.commit() except Exception, e: print sql print e
def watch_for_delete(self, num): v1 = client.CoreV1Api() w = watch.Watch() for event in w.stream(v1.list_pod_for_all_namespaces, timeout_seconds=5 * 60): resource = event['object'] message_type = event['type'] if resource.metadata.namespace != "default": continue if self.name == "" and resource.metadata.labels.get("deployment") != self.deployment: continue if self.name != "" and resource.metadata.name != self.name: continue LOG.info("Event: %s %s" % (event['type'], event['object'].metadata.name)) if message_type == "DELETED": if self.name == "": self.pods.append(resource) if len(self.pods) == num: self.pods = [] return True else: return True return False
def _k8s_flapp_watcher(self): resource_version = '0' watcher = watch.Watch() while True: if not self._running: watcher.stop() break # resource_version '0' means getting a recent resource without # consistency guarantee, this is to reduce the load of etcd. # Ref: https://kubernetes.io/docs/reference/using-api # /api-concepts/ #the-resourceversion-parameter stream = watcher.stream( k8s_client.crds.list_namespaced_custom_object, group=FEDLEARNER_CUSTOM_GROUP, version=FEDLEARNER_CUSTOM_VERSION, namespace=Envs.K8S_NAMESPACE, plural='flapps', resource_version=resource_version, timeout_seconds=1800, # Sometimes watch gets stuck _request_timeout=1800, # Sometimes HTTP GET gets stuck ) try: for event in stream: self._produce_event(event, ObjectType.FLAPP) metadata = event['object'].get('metadata') if metadata['resourceVersion'] is not None: resource_version = metadata['resourceVersion'] logging.debug( f'resource_version now: {resource_version}') except client.exceptions.ApiException as e: logging.error(f'watcher:{str(e)}') if e.status == HTTPStatus.GONE: # It has been too old, resources should be relisted resource_version = '0' except Exception as e: # pylint: disable=broad-except logging.error(f'K8s watcher gets event error: {str(e)}', exc_info=True)
def watch_prometheusclusters(api_version): log = logging.getLogger(__name__) log.debug("Loading CustomObjectsApi client") # https://github.com/kubernetes-client/python/blob/v11.0.0/kubernetes/client/api/custom_objects_api.py coa_client = client.CustomObjectsApi() api_group = 'relaxdiego.com' crd_name = 'prometheusclusters' log.info(f"Watching {crd_name}.{api_group}/{api_version} events") # References: # 1. Watchable methods: # https://raw.githubusercontent.com/kubernetes-client/python/v11.0.0/kubernetes/client/api/core_v1_api.py # https://github.com/kubernetes-client/python/blob/master/kubernetes/client/api/apiextensions_v1_api.py # # 2. The Watch#stream() method # https://github.com/kubernetes-client/python-base/blob/d30f1e6fd4e2725aae04fa2f4982a4cfec7c682b/watch/watch.py#L107-L157 for event in watch.Watch().stream(coa_client.list_cluster_custom_object, group=api_group, plural=crd_name, version=api_version): custom_obj = event['raw_object'] log.debug(f"Received: {custom_obj}") event_type = event['type'] if api_version == 'v1alpha1': provisioner = v1alpha1 pco = provisioner.PrometheuClusterObject(**custom_obj) log.info(f"{event_type} {pco}") if event_type == "ADDED": provisioner.install(pco) elif event_type == "DELETED": provisioner.uninstall(pco) elif event_type == "MODIFIED": provisioner.upgrade(pco) else: log.info(f"Unhandled event type '{event_type}'")
def main(k8s_context=None): logging.info(f"Deregistering all services and starting clean") k8s_consul_deregister_svc.consul_deregister_all(consul_host=consul_url) # configure client config.load_kube_config(config_file=kube_Config_File) api = client.CoreV1Api() # Setup new watch w = watch.Watch() logging.info(f"Watching for Kubernetes services for all namespaces") for item in w.stream(api.list_service_for_all_namespaces, timeout_seconds=600): svc = item['object'] # get the metadata labels labels = svc.metadata.labels # look for a label named "registerWithMesh" if (labels.get('registerWithMesh', None) if labels else []) == "true": register_flag = True else: register_flag = False # notify consul about the service logging.info( f"service {svc.metadata.name} k8s event type is {item.get('type')}, label registerWithMesh is set to: {register_flag} and service type is {svc.spec.type}" ) try: ext_ip = svc.status.load_balancer.ingress[0].ip except TypeError as err: logging.info("External IP not set for LoadBalancer service") ext_ip = None if register_flag == True and svc.spec.type in ("NodePort", "ClusterIP"): notify_consul(svc, item['type'], labels) elif register_flag == True and svc.spec.type in ("LoadBalancer"): if not (ext_ip is None): logging.info(f"external ip is not null: {ext_ip}") notify_consul(svc, item['type'], labels) else: logging.info(f"external ip is null: {ext_ip}") else: logging.info(f"watch stream for new events")
def watch_for_success(self): v1 = client.CoreV1Api() w = watch.Watch() for event in w.stream(v1.list_pod_for_all_namespaces, timeout_seconds=10 * 60): resource = event['object'] if resource.metadata.namespace != "default": continue if self.name == "" and resource.metadata.labels is not None and \ resource.metadata.labels.get("deployment") != self.deployment: continue if self.name != "" and resource.metadata.name != self.name: continue LOG.info("Event: %s %s" % (event['type'], event['object'].metadata.name)) if self.__is_pod_ready(resource): if self.name == "": self.pods.append(resource) if len(self.pods) == self.replicas: self.pods = [] return True else: return True return False
def _run(self, kube_client, resource_version, worker_uuid, kube_config): self.log.info( 'Event: and now my watch begins starting at resource_version: %s', resource_version) watcher = watch.Watch() kwargs = {'label_selector': 'airflow-worker={}'.format(worker_uuid)} if resource_version: kwargs['resource_version'] = resource_version if kube_config.kube_client_request_args: for key, value in kube_config.kube_client_request_args.items(): kwargs[key] = value last_resource_version = None if self.multi_namespace_mode: list_worker_pods = functools.partial( watcher.stream, kube_client.list_pod_for_all_namespaces, **kwargs) else: list_worker_pods = functools.partial( watcher.stream, kube_client.list_namespaced_pod, self.namespace, **kwargs) for event in list_worker_pods(): task = event['object'] self.log.info('Event: %s had an event of type %s', task.metadata.name, event['type']) if event['type'] == 'ERROR': return self.process_error(event) self.process_status( pod_id=task.metadata.name, namespace=task.metadata.namespace, status=task.status.phase, labels=task.metadata.labels, resource_version=task.metadata.resource_version, event=event, ) last_resource_version = task.metadata.resource_version return last_resource_version
def main(): config.load_incluster_config() crds = client.CustomObjectsApi() # TODO(mattmoor): Share a library with the meta controller name = os.environ["API_NAME"] domain = "%s.googleapis.com" % name version = os.environ["API_VERSION"] resource = os.environ["API_RESOURCE"] plural = resource.lower() + "s" creds = AppAssertionCredentials() api = discovery_build(name, version, credentials=creds) def call(obj): spec = obj["spec"] logging.error("TODO call %s/%s %s on %s", name, version, resource, json.dumps(obj, indent=1)) resource_version = "" while True: stream = watch.Watch().stream(crds.list_cluster_custom_object, domain, version, plural, resource_version=resource_version) for event in stream: # TODO(mattmoor): Execute in a threadpool. try: obj = event["object"] call(obj) # Configure where to resume streaming. metadata = obj.get("metadata") if metadata: resource_version = metadata["resourceVersion"] except: logging.exception("Error handling event")
def _start_id_giver(self, docker_image_name): job_name = 'lithops-idgiver' idgiver_pods = self.core_api.list_namespaced_pod( namespace=self.namespace, label_selector="job-name={}".format(job_name)) if len(idgiver_pods.items) > 0: return idgiver_pods.items[0].status.pod_ip try: self.batch_api.delete_namespaced_job( name=job_name, namespace=self.namespace, propagation_policy='Background') time.sleep(2) except Exception as e: pass job_res = yaml.safe_load(k8s_config.JOB_DEFAULT) job_res['metadata']['name'] = job_name job_res['metadata']['namespace'] = self.namespace container = job_res['spec']['template']['spec']['containers'][0] container['image'] = docker_image_name container['env'][0]['value'] = 'id_giver' try: self.batch_api.create_namespaced_job(namespace=self.namespace, body=job_res) except Exception as e: raise e w = watch.Watch() for event in w.stream(self.core_api.list_namespaced_pod, namespace=self.namespace, label_selector="job-name={}".format(job_name)): if event['object'].status.phase == "Running": return event['object'].status.pod_ip
def _run(self, kube_client: client.CoreV1Api, resource_version: Optional[str], worker_uuid: str, kube_config: Any) -> Optional[str]: self.log.info( 'Event: and now my watch begins starting at resource_version: %s', resource_version ) watcher = watch.Watch() kwargs = {'label_selector': 'airflow-worker={}'.format(worker_uuid)} if resource_version: kwargs['resource_version'] = resource_version if kube_config.kube_client_request_args: for key, value in kube_config.kube_client_request_args.items(): kwargs[key] = value last_resource_version: Optional[str] = None for event in watcher.stream(kube_client.list_namespaced_pod, self.namespace, **kwargs): task = event['object'] self.log.info( 'Event: %s had an event of type %s', task.metadata.name, event['type'] ) if event['type'] == 'ERROR': return self.process_error(event) self.process_status( pod_id=task.metadata.name, namespace=task.metadata.namespace, status=task.status.phase, labels=task.metadata.labels, resource_version=task.metadata.resource_version, event=event, ) last_resource_version = task.metadata.resource_version return last_resource_version
def handle(self, *args, **options): signal.signal(signal.SIGABRT, quit_handler) signal.signal(signal.SIGINT, quit_handler) signal.signal(signal.SIGTERM, quit_handler) if settings.AVES2_CLUSTER != "k8s": raise Exception('k8s cluster is required') api = client.CoreV1Api() watcher = watch.Watch() label = f'app={settings.AVES_JOB_LABEL}' while True: try: for event in watcher.stream(api.list_pod_for_all_namespaces, label_selector=label, pretty=True, watch=True): event_type = event.get('type') pod = event['object'] pod_name = pod.metadata.name phase = pod.status.phase logger.info( f"receive pod event type:{event_type} pod_name:{pod_name} phase:{phase}" ) if event['object'].status.conditions: event['object'].status.conditions = sorted( event['object'].status.conditions, key=lambda x: x.last_transition_time) if event_type not in ['MODIFIED', 'ADDED']: continue try: tasks.process_k8s_pod_event.apply_async( (event, ), serializer='pickle') except Exception as e: self.stdout.write(str(e)) except Exception: pass
def watch(self, resource, namespace=None, name=None, label_selector=None, field_selector=None, resource_version=None, timeout=None): """ Stream events for a resource from the Kubernetes API :param resource: The API resource object that will be used to query the API :param namespace: The namespace to query :param name: The name of the resource instance to query :param label_selector: The label selector with which to filter results :param field_selector: The field selector with which to filter results :param resource_version: The version with which to filter results. Only events with a resource_version greater than this value will be returned :param timeout: The amount of time in seconds to wait before terminating the stream :return: Event object with these keys: 'type': The type of event such as "ADDED", "DELETED", etc. 'raw_object': a dict representing the watched object. 'object': A ResourceInstance wrapping raw_object. Example: client = DynamicClient(k8s_client) v1_pods = client.resources.get(api_version='v1', kind='Pod') for e in v1_pods.watch(resource_version=0, namespace=default, timeout=5): print(e['type']) print(e['object'].metadata) """ watcher = watch.Watch() for event in watcher.stream( resource.get, namespace=namespace, name=name, field_selector=field_selector, label_selector=label_selector, resource_version=resource_version, serialize=False, timeout_seconds=timeout ): event['object'] = ResourceInstance(resource, event['object']) yield event
def check_pod_status(pod_dict): v1 = kube_client.CoreV1Api() w = watch.Watch() for event in w.stream(v1.list_namespaced_pod, namespace='azure-arc', timeout_seconds=360): pod_status = event['raw_object'].get('status') pod_name = event['object'].metadata.name if pod_status.get('containerStatuses'): for container in pod_status.get('containerStatuses'): if container.get('state').get('running') is None: pod_dict[pod_name] = 0 break else: pod_dict[pod_name] = 1 if container.get('state').get('terminated') is not None: logger.warning("%s%s%s", "The pod {} was terminated. ".format(container.get('name')), "Please ensure it is in running state once the operation completes. ", "Run 'kubectl get pods -n azure-arc' to check the pod status.") if all(ele == 1 for ele in list(pod_dict.values())): return telemetry.add_extension_event('connectedk8s', {'Context.Default.AzureCLI.ExitStatus': 'Timedout'}) logger.warning("%s%s", 'The pods were unable to start before timeout. ', 'Please run "kubectl get pods -n azure-arc" to ensure if the pods are in running state.')
def _create_stream(self, namespace): """ Create a stream that gets events for the our model """ w = None stream = None exception_class = self.get_exception_class() try: list_method = self.lookup_method('list', namespace) w = watch.Watch() w._api_client = self.api_client # monkey patch for access to OpenShift models if namespace: stream = w.stream(list_method, namespace, _request_timeout=self.timeout) else: stream = w.stream(list_method, _request_timeout=self.timeout) except exception_class: pass except Exception: raise return w, stream
def main(k8s_context=None): # setup the namespace logging.info(f"Running service for Kubernetes cluster {k8s_context}") ns = os.getenv("K8S_NAMESPACE") if ns is None: ns = "" # configure client config.load_kube_config(context=k8s_context) api = client.CoreV1Api() # Setup new watch w = watch.Watch() logging.info("Watching for Kubernetes services for all namespaces") for item in w.stream(api.list_service_for_all_namespaces, timeout_seconds=0): svc = item['object'] # get the metadata labels labels = svc.metadata.labels # look for a label named "registerWithMesh" if not labels: logging.info('No label found') else: try: if labels['registerWithMesh'] == "true": register_flag = True else: register_flag = False except KeyError: register_flag = False logging.info( f"label: registerWithMesh not found for service {svc.metadata.name}" ) # notify consul about the service if register_flag == True: notify_consul(svc, item['type'])
def get_k8_instance(service_token_path, service_cert_path, namespace_path): configuration = client.Configuration() # Get K8 service host and port from environment configuration.host = "https://%s:%s" % (os.getenv( "KUBERNETES_SERVICE_HOST"), os.getenv("KUBERNETES_SERVICE_PORT")) # Pull service account token from secrets and add to configuration #service_token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" if not os.path.isfile(service_token_path): raise ApiException("Service token file does not exist.") with open(service_token_path) as f: token = f.read() if not token: raise ApiException("Token file exists but is empty.") configuration.api_key['authorization'] = "bearer " + token.strip('\n') # Pull service cert from secrets and add to configuration #service_cert_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" if not os.path.isfile(service_cert_path): raise ApiException("Service certification file does not exist.") with open(service_cert_path) as f: if not f.read(): raise ApiException("Cert file exists but is empty.") configuration.ssl_ca_cert = service_cert_path client.Configuration.set_default(configuration) # Pull namespace from secrets #namespace_path = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' with open(namespace_path, 'r') as fin: namespace = fin.read() # Pull pod name from environment pod_name = os.environ['POD_NAME'] if pod_name == None or pod_name == "": raise EnvironmentError("Pod name not found in POD_NAME.") return client.CoreV1Api(), watch.Watch(), namespace, pod_name
def start_watch(self, on_success, on_error): config.load_kube_config(KUBE_CONFIG_PATH) c = Configuration() c.verify_ssl = True Configuration.set_default(c) ccApi = client.CoreV1Api() w = watch.Watch() LOG.info('Monitor secrets in %s' % self.namespace) for item in w.stream(ccApi.list_namespaced_secret, namespace=self.namespace): event_data = CertUpdateEventData(item) for listener in self.listeners: update_event = CertUpdateEvent(listener, event_data) try: if listener.notify_changed(event_data): on_success(update_event.get_id()) except Exception as e: LOG.error('Monitoring action %s failed. %s' % (event_data, e)) if not isinstance(e, URLError): LOG.exception(e) on_error(update_event)
def watch_for_policies(): config.load_kube_config() v1 = client.ExtensionsV1beta1Api() network_policy = {} network_policy_update = {} w = watch.Watch() for event in w.stream(v1.list_network_policy_for_all_namespaces): print event print("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name)) if event['type'] == 'ADDED': if event['object'].metadata.uid not in network_policy: network_policy[ event['object'].metadata.uid] = event['raw_object'] result = yield gen.Task(create_new_policy_rules, network_policy) elif event['type'] == 'UPDATED': network_policy_updated[ event['object'].metadata.uid] = event['raw_object'] result = yield gen.Task(create_updated_policy_rules, network_policy_updated) print 'result is', result IOLoop.instance().stop()
def get_status(name, logging=True): api = client.CoreV1Api() # setup watch w = watch.Watch() status = "running" pod = None while status not in ["succeeded", "failed"]: for event in w.stream(api.list_namespaced_pod, namespace="default", timeout_seconds=0): if event["object"].metadata.labels.get("job-name", None) == str(name): pod = event["object"].metadata.name status = event["object"].status.phase.lower() if status in ["succeeded", "failed"]: break w.stop() logs = api.read_namespaced_pod_log(name=pod, namespace="default") if logging: create_logfile(pod, logs) return status, pod
def crd_watch(): crds = client.CustomObjectsApi(k8s_client) w = watch.Watch() resource_version = "" for event in w.stream(crds.list_cluster_custom_object, "kadalu-operator.gluster", "v1alpha1", "kadaluvolumes", resource_version=resource_version): obj = event["object"] operation = event['type'] spec = obj.get("spec") if not spec: continue metadata = obj.get("metadata") resource_version = metadata['resourceVersion'] info("Event", operation=operation, object=repr(obj)) if operation == "ADDED": handle_added(crds, obj) elif operation == "MODIFIED": handle_modified(crds, obj) elif operation == "DELETED": handle_deleted(crds, obj)
def get_pod_log_stream( self, pod_name: str, container: Optional[str] = "", namespace: Optional[str] = None, ) -> Tuple[watch.Watch, Generator[str, None, None]]: """ Retrieves a log stream for a container in a kubernetes pod. :param pod_name: pod name :param container: container name :param namespace: kubernetes namespace """ watcher = watch.Watch() return ( watcher, watcher.stream( self.core_v1_client.read_namespaced_pod_log, name=pod_name, container=container, namespace=namespace if namespace else self.get_namespace(), ), )
def logs(self, task: KubernetesTask): if not isinstance(task, KubernetesTask): raise TypeError('Expected a valid KubernetesTask') # wait for pod to become ready self.wait_until_ready(task.id) try: w = watch.Watch() logs = w.stream( self.core.read_namespaced_pod_log, name=task.id, namespace=self.namespace, ) def add_newline_stream(task): for log in logs: yield log + '\n' return json_stream(add_newline_stream(task)) except Exception: return self.logs(task)
def run(self): """Watch k8s pod status and add event to queue """ resource_version = '' w = watch.Watch() for event in w.stream(self.v1.list_namespaced_event, namespace=self.namespace, field_selector='involvedObject.kind=Pod'): if self.terminate: break resource_version = int(event['object'].metadata.resource_version) info = dict( eType=EventType.Watch, pod=event['object'].involved_object.name, reason=event['object'].reason, message=event['object'].message, start_up=self.at_start_up, ) self.at_start_up = False # logger.info('Received event: {}'.format(info)) self.queue.put(info)
def getLogs(self): label = "app=master" + "-" + self.buildGuid resp = self.coreV1ApiInstance.list_namespaced_pod(label_selector=label, namespace='default') podName = resp.items[0].metadata.name status = '' while True: resp = self.coreV1ApiInstance.read_namespaced_pod( name=podName, namespace='default') status = resp.status.phase if status == 'Running' or status == 'Succeeded': break w = watch.Watch() try: for line in w.stream( self.coreV1ApiInstance.read_namespaced_pod_log, name=podName, namespace='default'): self.logger.info(line) except Exception as e: self.logger.error(e) self.logger.info("pod terminated")
def watch_pod(self): """ watch pod create/update/delete event and operate neo4j database record accordingly :return: """ w = watch.Watch() for event in w.stream(self.v1api.list_pod_for_all_namespaces): # get objects which could have relationship with pod labels = event['object'].metadata.labels namespace = event['object'].metadata.namespace node_name = event['object'].spec.node_name evt_type = event['type'] if evt_type in ['ADDED', 'MODIFIED']: # extract properties from Event props = self._build_base_props(event) props['status'] = event['object'].status.phase props['pod_ip'] = event['object'].status.pod_ip self.neoclient.upsert_pod(props, labels=labels, namespace=namespace, node_name=node_name) elif 'DELETED' == evt_type: self.neoclient.remove_pod(event['object'].metadata.name)
def main(): # Configs can be set in Configuration class directly or using helper # utility. If no argument provided, the config will be loaded from # default location. config.load_kube_config() v1 = client.CoreV1Api() count = 10 w = watch.Watch() for event in w.stream(v1.list_namespace, timeout_seconds=10): print("Event: %s %s" % (event['type'], event['object'].metadata.name)) count -= 1 if not count: w.stop() print("Finished namespace stream.") for event in w.stream(v1.list_pod_for_all_namespaces, timeout_seconds=10): print("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name)) count -= 1 if not count: w.stop() print("Finished pod stream.")
def wait_vnf(self): """Wait vIMS is up and running""" assert self.namespace status = self.deployment_list.copy() watch_deployment = watch.Watch() for event in watch_deployment.stream( func=self.appsv1.list_namespaced_deployment, namespace=self.namespace, timeout_seconds=self.watch_timeout): self.__logger.debug(event) if event["object"].status.ready_replicas == 1: if event['object'].metadata.name in status: status.remove(event['object'].metadata.name) self.__logger.info("%s started in %0.2f sec", event['object'].metadata.name, time.time() - self.start_time) if not status: watch_deployment.stop() if not status: self.result = 1 / 2 * 100 return True self.__logger.error("Cannot deploy vIMS") return False
def _watch_service(self, service_type, linger=10): from urllib3.exceptions import ReadTimeoutError from kubernetes import watch cur_pods = set(self._get_endpoints_by_service_type(service_type, update=True)) w = watch.Watch() pod_to_ep = self._service_pod_to_ep[service_type] while True: # when some pods are not ready, we refresh faster linger_seconds = linger() if callable(linger) else linger streamer = w.stream( self._client.list_namespaced_pod, namespace=self._k8s_namespace, label_selector=self._get_label_selector(service_type), timeout_seconds=linger_seconds ) while True: try: event = self._pool.spawn(next, streamer, StopIteration).result() if event is StopIteration: raise StopIteration except (ReadTimeoutError, StopIteration): new_pods = set(self._get_endpoints_by_service_type(service_type, update=True)) if new_pods != cur_pods: cur_pods = new_pods yield self._get_endpoints_by_service_type(service_type, update=False) break except: # noqa: E722 # pragma: no cover # pylint: disable=bare-except logger.exception('Unexpected error when watching on kubernetes') break obj_dict = event['object'].to_dict() pod_name, endpoint = self._extract_pod_name_ep(obj_dict) pod_to_ep[pod_name] = endpoint \ if endpoint and self._extract_pod_ready(obj_dict) else None yield self._get_endpoints_by_service_type(service_type, update=False)
def __watch(self): label_selector = "{}={}".format( OPPORTUNISTIC_RESOURCE_NODE_NAME_LABEL_KEY, self.__node_name) log.info("Starting opportunistic resource watch...") stream = None try: stream = watch.Watch().stream( self.__custom_api.list_cluster_custom_object, group="titus.netflix.com", version="v1", plural="opportunistic-resources", label_selector=label_selector) for event in stream: log.info("Event: %s", event) if self.__is_expired_error(event): raise Exception("Opportunistic resource expired") event_type = event['type'] if event_type not in HANDLED_EVENTS: log.warning("Ignoring unhandled event: %s", event) continue event_metadata_name = event['object']['metadata']['name'] with self.__lock: if event_type == ADDED: self.__opportunistic_resources[ event_metadata_name] = event elif event_type == DELETED: self.__opportunistic_resources.pop( event_metadata_name, None) except Exception: if stream is not None: stream.close() log.exception("Watch of opportunistic resources failed") self.__exit_handler.exit(OPPORTUNISTIC_WATCH_FAILURE)
def tail_logs(team_name, tasks) -> None: for task in tasks: task_id = task["Identifier"] _logger.info("Watching task: '%s'", task_id) namespace = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) current_pods: V1PodList = CoreV1Api().list_namespaced_pod( namespace=namespace, label_selector=f"job-name={task_id}") for pod in current_pods.items: pod_instance: V1Pod = cast(V1Pod, pod) _logger.debug("pod: %s", pod_instance.metadata.name) pod_status: V1PodStatus = cast(V1PodStatus, pod_instance.status) _logger.debug("pod s: %s", pod_status) if pod_status.conditions: for c in pod_status.conditions: condition: V1PodCondition = cast(V1PodCondition, c) if condition.type == "Failed" or condition.reason == "Unschedulable": _logger.info("pod has error status %s , %s", condition.reason, condition.message) return if pod_status.container_statuses: for s in pod_status.container_statuses: container_status: V1ContainerStatus = cast( V1ContainerStatus, s) container_state: V1ContainerState = container_status.state _logger.debug("task status: %s ", container_status) if container_status.started or container_state.running or container_state.terminated: _logger.info("task %s status: %s", pod_instance.metadata.name, container_state) w = k8_watch.Watch() for line in w.stream( CoreV1Api().read_namespaced_pod_log, name=pod_instance.metadata.name, namespace=namespace): _logger.info(line) else: _logger.info("task not started yet for %s", task_id)