Exemplo n.º 1
0
    def get_job_logs(self, job_pod):
        """Get job pod's containers' logs."""
        try:
            pod_logs = ""
            # job_pod = current_k8s_corev1_api_client.read_namespaced_pod(
            #     namespace='default',
            #     name=job_pod.metadata.name)
            # we probably don't need this call again... FIXME
            container_statuses = job_pod.status.container_statuses + (
                job_pod.status.init_container_statuses or [])

            logging.info("Grabbing pod {} logs ...".format(
                job_pod.metadata.name))
            for container in container_statuses:
                if container.state.terminated:
                    container_log = current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace="default",
                        name=job_pod.metadata.name,
                        container=container.name,
                    )
                    pod_logs += "{}: :\n {}\n".format(container.name,
                                                      container_log)
                elif container.state.waiting:
                    pod_logs += "Container {} failed, error: {}".format(
                        container.name, container.state.waiting.message)

            return pod_logs
        except client.rest.ApiException as e:
            logging.error(
                "Error while connecting to Kubernetes API: {}".format(e))
            return None
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.error("Unexpected error: {}".format(e))
            return None
Exemplo n.º 2
0
 def get_container_logs(self, job_id):
     """Get job pod's containers' logs."""
     try:
         pod_logs = ''
         pod = current_k8s_corev1_api_client.read_namespaced_pod(
             namespace='default', name=job_id)
         containers = pod.spec.init_containers + pod.spec.containers \
             if pod.spec.init_containers else pod.spec.containers
         for container in containers:
             container_log = \
                 current_k8s_corev1_api_client.read_namespaced_pod_log(
                     namespace='default',
                     name=job_id,
                     container=container.name)
             pod_logs += '{}: \n {} \n'.format(container.name,
                                               container_log)
         return pod_logs
     except client.rest.ApiException as e:
         logging.error(
             "Error while connecting to Kubernetes API: {}".format(e))
         return None
     except Exception as e:
         logging.error(traceback.format_exc())
         logging.error("Unexpected error: {}".format(e))
         return None
def _delete_workflow_engine_pod(workflow):
    """Delete workflow engine pod."""
    try:
        jobs = current_k8s_corev1_api_client.list_namespaced_pod(
            namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
        )
        for job in jobs.items:
            if str(workflow.id_) in job.metadata.name:
                workflow_enginge_logs = current_k8s_corev1_api_client.read_namespaced_pod_log(
                    namespace=job.metadata.namespace,
                    name=job.metadata.name,
                    container="workflow-engine",
                )
                workflow.logs = (workflow.logs or "") + workflow_enginge_logs + "\n"
                current_k8s_batchv1_api_client.delete_namespaced_job(
                    namespace=job.metadata.namespace,
                    propagation_policy="Background",
                    name=job.metadata.labels["job-name"],
                )
                break
    except ApiException as e:
        raise REANAWorkflowControllerError(
            "Workflow engine pod cound not be deleted {}.".format(e)
        )
    except Exception as e:
        logging.error(traceback.format_exc())
        logging.error("Unexpected error: {}".format(e))
Exemplo n.º 4
0
def _delete_workflow_engine_pod(workflow):
    """Delete workflow engine pod."""
    try:
        jobs = current_k8s_corev1_api_client.list_namespaced_pod(
            namespace='default',
        )
        for job in jobs.items:
            if str(workflow.id_) in job.metadata.name:
                workflow_enginge_logs = \
                    current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace=job.metadata.namespace,
                        name=job.metadata.name,
                        container='workflow-engine')
                workflow.logs =  \
                    (workflow.logs or '') + workflow_enginge_logs + '\n'
                current_k8s_batchv1_api_client.delete_namespaced_job(
                    namespace='default',
                    propagation_policy="Background",
                    name=job.metadata.labels['job-name'])
                break
    except ApiException as e:
        raise REANAWorkflowControllerError(
            "Workflow engine pod cound not be deleted {}.".format(e))
    except Exception as e:
        logging.error(traceback.format_exc())
        logging.error("Unexpected error: {}".format(e))
Exemplo n.º 5
0
    def get_job_logs(self, job_pod):
        """Get job pod's containers' logs."""
        try:
            pod_logs = ""
            container_statuses = (job_pod.status.container_statuses or []) + (
                job_pod.status.init_container_statuses or [])

            logging.info("Grabbing pod {} logs ...".format(
                job_pod.metadata.name))
            for container in container_statuses:
                if container.state.terminated:
                    container_log = current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
                        name=job_pod.metadata.name,
                        container=container.name,
                    )
                    pod_logs += "{}: :\n {}\n".format(container.name,
                                                      container_log)
                    if hasattr(container.state.terminated, "reason"):
                        pod_logs += "\n{}\n".format(
                            container.state.terminated.reason)
                elif container.state.waiting:
                    pod_logs += "Container {} failed, error: {}".format(
                        container.name, container.state.waiting.message)

            return pod_logs
        except client.rest.ApiException as e:
            logging.error(
                "Error while connecting to Kubernetes API: {}".format(e))
            return None
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.error("Unexpected error: {}".format(e))
            return None
def _get_workflow_engine_pod_logs(workflow: Workflow) -> str:
    try:
        pods = current_k8s_corev1_api_client.list_namespaced_pod(
            namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
            label_selector=f"reana-run-batch-workflow-uuid={str(workflow.id_)}",
        )
        for pod in pods.items:
            if str(workflow.id_) in pod.metadata.name:
                return current_k8s_corev1_api_client.read_namespaced_pod_log(
                    namespace=pod.metadata.namespace,
                    name=pod.metadata.name,
                    container="workflow-engine",
                )
    except ApiException as e:
        raise REANAWorkflowControllerError(
            f"Workflow engine pod logs could not be fetched. Error: {e}")
Exemplo n.º 7
0
def k8s_watch_jobs(job_db):
    """Open stream connection to k8s apiserver to watch all jobs status.

    :param job_db: Dictionary which contains all current jobs.
    :param config: configuration to connect to k8s apiserver.
    """
    while True:
        logging.debug('Starting a new stream request to watch Jobs')
        try:
            w = watch.Watch()
            for event in w.stream(current_k8s_batchv1_api_client.
                                  list_job_for_all_namespaces):
                logging.info('New Job event received: {0}'.format(
                    event['type']))
                job = event['object']

                # Taking note of the remaining jobs since deletion might not
                # happend straight away.
                remaining_jobs = [
                    j for j in job_db.keys() if not job_db[j]['deleted']
                ]
                if (not job_db.get(job.metadata.name)
                        or job.metadata.name not in remaining_jobs):
                    # Ignore jobs not created by this specific instance
                    # or already deleted jobs.
                    continue
                elif job.status.succeeded:
                    logging.info('Job {} succeeded.'.format(job.metadata.name))
                    job_db[job.metadata.name]['status'] = 'succeeded'
                elif (job.status.failed
                      and job.status.failed >= config.MAX_JOB_RESTARTS):
                    logging.info('Job {} failed.'.format(job.metadata.name))
                    job_db[job.metadata.name]['status'] = 'failed'
                else:
                    continue
                # Grab logs when job either succeeds or fails.
                logging.info('Getting last spawned pod for job {}'.format(
                    job.metadata.name))
                last_spawned_pod = \
                    current_k8s_corev1_api_client.list_namespaced_pod(
                        job.metadata.namespace,
                        label_selector='job-name={job_name}'.format(
                            job_name=job.metadata.name)).items[-1]
                logging.info('Grabbing pod {} logs...'.format(
                    last_spawned_pod.metadata.name))
                job_db[job.metadata.name]['log'] = \
                    current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace=last_spawned_pod.metadata.namespace,
                        name=last_spawned_pod.metadata.name)
                # Store job logs
                try:
                    logging.info('Storing job logs: {}'.format(
                        job_db[job.metadata.name]['log']))
                    Session.query(Job).filter_by(id_=job.metadata.name). \
                        update(dict(logs=job_db[job.metadata.name]['log']))
                    Session.commit()

                except Exception as e:
                    logging.debug(
                        'Could not retrieve'
                        ' logs for object: {}'.format(last_spawned_pod))
                    logging.debug('Exception: {}'.format(str(e)))

                logging.info('Cleaning job {} ...'.format(job.metadata.name))
                k8s_delete_job(job)
                job_db[job.metadata.name]['deleted'] = True
        except client.rest.ApiException as e:
            logging.debug(
                "Error while connecting to Kubernetes API: {}".format(e))
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.debug("Unexpected error: {}".format(e))
Exemplo n.º 8
0
def watch_jobs_kubernetes(job_db):
    """Open stream connection to k8s apiserver to watch all jobs status.

    :param job_db: Dictionary which contains all current jobs.
    """
    while True:
        logging.debug('Starting a new stream request to watch Jobs')
        try:
            w = watch.Watch()
            for event in w.stream(current_k8s_batchv1_api_client.
                                  list_job_for_all_namespaces):
                logging.info('New Job event received: {0}'.format(
                    event['type']))
                job = event['object']

                # Taking note of the remaining jobs since deletion might not
                # happen straight away.
                remaining_jobs = dict()
                for job_id, job_dict in job_db.items():
                    if not job_db[job_id]['deleted']:
                        remaining_jobs[job_dict['backend_job_id']] = job_id
                if (not job_db.get(remaining_jobs.get(job.metadata.name))
                        or job.metadata.name not in remaining_jobs):
                    # Ignore jobs not created by this specific instance
                    # or already deleted jobs.
                    continue
                job_id = remaining_jobs[job.metadata.name]
                kubernetes_job_id = job.metadata.name
                if job.status.succeeded:
                    logging.info('Job job_id: {}, kubernetes_job_id: {}'
                                 ' succeeded.'.format(job_id,
                                                      kubernetes_job_id))
                    job_db[job_id]['status'] = 'succeeded'
                elif (job.status.failed
                      and job.status.failed >= config.MAX_JOB_RESTARTS):
                    logging.info(
                        'Job job_id: {}, kubernetes_job_id: {} failed.'.format(
                            job_id, kubernetes_job_id))
                    job_db[job_id]['status'] = 'failed'
                else:
                    continue
                # Grab logs when job either succeeds or fails.
                logging.info('Getting last spawned pod for kubernetes'
                             ' job {}'.format(kubernetes_job_id))
                last_spawned_pod = \
                    current_k8s_corev1_api_client.list_namespaced_pod(
                        namespace=job.metadata.namespace,
                        label_selector='job-name={job_name}'.format(
                            job_name=kubernetes_job_id)).items[-1]
                logging.info('Grabbing pod {} logs...'.format(
                    last_spawned_pod.metadata.name))
                job_db[job_id]['log'] = \
                    current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace=last_spawned_pod.metadata.namespace,
                        name=last_spawned_pod.metadata.name)
                store_logs(job_id=job_id, logs=job_db[job_id]['log'])

                logging.info(
                    'Cleaning Kubernetes job {} ...'.format(kubernetes_job_id))
                KubernetesJobManager.stop(kubernetes_job_id)
                job_db[job_id]['deleted'] = True
        except client.rest.ApiException as e:
            logging.debug(
                "Error while connecting to Kubernetes API: {}".format(e))
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.debug("Unexpected error: {}".format(e))