def test_stop_kubernetes_job(app, session, sample_serial_workflow_in_db,
                             sample_workflow_workspace, empty_user_secrets,
                             default_user, corev1_api_client_with_user_secrets,
                             monkeypatch):
    """Test stop of Kubernetes job."""
    workflow_uuid = sample_serial_workflow_in_db.id_
    next(sample_workflow_workspace(str(workflow_uuid)))
    expected_env_var_name = "env_var"
    expected_env_var_value = "value"
    expected_image = "busybox"
    expected_command = ["ls"]
    monkeypatch.setenv('REANA_USER_ID', str(default_user.id_))
    job_manager = KubernetesJobManager(
        docker_img=expected_image,
        cmd=expected_command,
        env_vars={expected_env_var_name: expected_env_var_value},
        workflow_uuid=workflow_uuid)
    with mock.patch("reana_job_controller.kubernetes_job_manager."
                    "current_k8s_batchv1_api_client") as kubernetes_client:
        with mock.patch(
                "reana_commons.k8s.secrets."
                "current_k8s_corev1_api_client",
                corev1_api_client_with_user_secrets(empty_user_secrets)):
            kubernetes_job_id = job_manager.execute()
            kubernetes_client.create_namespaced_job.assert_called_once()
            job_manager.stop(kubernetes_job_id)
            kubernetes_client.delete_namespaced_job.assert_called_once()
Пример #2
0
def test_execute_kubernetes_job(
    app,
    session,
    sample_serial_workflow_in_db,
    sample_workflow_workspace,
    default_user,
    empty_user_secrets,
    corev1_api_client_with_user_secrets,
    monkeypatch,
):
    """Test execution of Kubernetes job."""
    workflow_uuid = sample_serial_workflow_in_db.id_
    workflow_workspace = next(sample_workflow_workspace(str(workflow_uuid)))
    env_var_key = "key"
    env_var_value = "value"
    expected_env_var = {env_var_key: env_var_value}
    expected_image = "busybox"
    expected_command = "ls"
    monkeypatch.setenv("REANA_USER_ID", str(default_user.id_))
    job_manager = KubernetesJobManager(
        docker_img=expected_image,
        cmd=expected_command,
        env_vars=expected_env_var,
        workflow_uuid=workflow_uuid,
        workflow_workspace=workflow_workspace,
    )
    with mock.patch(
        "reana_job_controller.kubernetes_job_manager." "current_k8s_batchv1_api_client"
    ) as kubernetes_client:
        with mock.patch(
            "reana_commons.k8s.secrets." "current_k8s_corev1_api_client",
            corev1_api_client_with_user_secrets(empty_user_secrets),
        ):
            kubernetes_job_id = job_manager.execute()
            created_job = (
                session.query(Job)
                .filter_by(backend_job_id=kubernetes_job_id)
                .one_or_none()
            )
            assert created_job
            assert created_job.docker_img == expected_image
            assert created_job.cmd == json.dumps(expected_command)
            assert json.dumps(expected_env_var) in created_job.env_vars
            assert created_job.status == JobStatus.created
            kubernetes_client.create_namespaced_job.assert_called_once()
            body = kubernetes_client.create_namespaced_job.call_args[1]["body"]
            env_vars = body["spec"]["template"]["spec"]["containers"][0]["env"]
            image = body["spec"]["template"]["spec"]["containers"][0]["image"]
            command = body["spec"]["template"]["spec"]["containers"][0]["args"]
            assert len(env_vars) == 3
            assert {"name": env_var_key, "value": env_var_value} in env_vars
            assert image == expected_image
            assert command == [expected_command]
def test_execute_kubernetes_job(app, session, sample_serial_workflow_in_db,
                                sample_workflow_workspace, default_user,
                                empty_user_secrets,
                                corev1_api_client_with_user_secrets,
                                monkeypatch):
    """Test execution of Kubernetes job."""
    workflow_uuid = sample_serial_workflow_in_db.id_
    next(sample_workflow_workspace(str(workflow_uuid)))
    expected_env_var_name = "env_var"
    expected_env_var_value = "value"
    expected_image = "busybox"
    expected_command = ["ls"]
    monkeypatch.setenv('REANA_USER_ID', str(default_user.id_))
    job_manager = KubernetesJobManager(
        docker_img=expected_image,
        cmd=expected_command,
        env_vars={expected_env_var_name: expected_env_var_value},
        workflow_uuid=workflow_uuid)
    with mock.patch("reana_job_controller.kubernetes_job_manager."
                    "current_k8s_batchv1_api_client") as kubernetes_client:
        with mock.patch(
                "reana_commons.k8s.secrets."
                "current_k8s_corev1_api_client",
                corev1_api_client_with_user_secrets(empty_user_secrets)):
            kubernetes_job_id = job_manager.execute()
            created_job = session.query(Job).filter_by(
                backend_job_id=kubernetes_job_id).one_or_none()
            assert created_job
            assert created_job.docker_img == expected_image
            assert created_job.cmd == json.dumps(expected_command)
            assert created_job.env_vars == json.dumps(
                {expected_env_var_name: expected_env_var_value})
            assert created_job.status == JobStatus.created
            kubernetes_client.create_namespaced_job.assert_called_once()
            body = kubernetes_client.create_namespaced_job.call_args[1]['body']
            env_vars = body['spec']['template']['spec']['containers'][0]['env']
            image = body['spec']['template']['spec']['containers'][0]['image']
            command = \
                body['spec']['template']['spec']['containers'][0]['command']
            assert len(env_vars) == 1
            assert env_vars[0]['name'] == expected_env_var_name
            assert env_vars[0]['value'] == expected_env_var_value
            assert image == expected_image
            assert command == expected_command
def test_stop_kubernetes_job(app, session, sample_serial_workflow_in_db,
                             sample_workflow_workspace):
    """Test stop of Kubernetes job."""
    workflow_uuid = sample_serial_workflow_in_db.id_
    next(sample_workflow_workspace(
        str(workflow_uuid)))
    expected_env_var_name = "env_var"
    expected_env_var_value = "value"
    expected_image = "busybox"
    expected_command = ["ls"]
    job_manager = KubernetesJobManager(
        docker_img=expected_image, cmd=expected_command,
        env_vars={expected_env_var_name: expected_env_var_value},
        workflow_uuid=workflow_uuid)
    with mock.patch("reana_job_controller.kubernetes_job_manager."
                    "current_k8s_batchv1_api_client") as kubernetes_client:
        kubernetes_job_id = job_manager.execute()
        kubernetes_client.create_namespaced_job.assert_called_once()
        job_manager.stop(kubernetes_job_id)
        kubernetes_client.delete_namespaced_job.assert_called_once()
Пример #5
0
    def watch_jobs(self, job_db, app=None):
        """Open stream connection to k8s apiserver to watch all jobs status.

        :param job_db: Dictionary which contains all current jobs.
        """
        while True:
            logging.debug('Starting a new stream request to watch Jobs')
            try:
                w = watch.Watch()
                for event in w.stream(
                        current_k8s_corev1_api_client.list_namespaced_pod,
                        namespace='default',
                        label_selector='job-name'):
                    logging.info('New Job event received: {0}'.format(
                        event['type']))
                    job = event['object']
                    # Taking note of the remaining jobs since deletion might
                    # not happen straight away.
                    remaining_jobs = dict()
                    for job_id, job_dict in job_db.items():
                        if not job_db[job_id]['deleted']:
                            remaining_jobs[job_dict['backend_job_id']] = job_id
                    if (not job_db.get(
                            remaining_jobs.get(
                                job.metadata.labels['job-name']))
                            or job.metadata.labels['job-name']
                            not in remaining_jobs):
                        # Ignore jobs not created by this specific instance
                        # or already deleted jobs.
                        continue
                    job_id = remaining_jobs[job.metadata.labels['job-name']]
                    kubernetes_job_id = job.metadata.labels['job-name']
                    kubernetes_pod_job_id = job.metadata.name
                    if job.status.phase == 'Succeeded':
                        logging.info('Job job_id: {}, kubernetes_job_id: {}'
                                     ' succeeded.'.format(
                                         job_id, kubernetes_job_id))
                        job_db[job_id]['status'] = 'succeeded'
                    elif job.status.phase == 'Failed':
                        logging.info(
                            'Job job_id: {}, kubernetes_job_id: {} failed.'.
                            format(job_id, kubernetes_job_id))
                        job_db[job_id]['status'] = 'failed'
                    elif job.status.phase == 'Pending':
                        try:
                            reason = \
                                job.status.container_statuses[0].state \
                                .waiting.reason
                            if 'ErrImagePull' in reason:
                                logging.info(
                                    'Job job_id: {}, kubernetes_job_id: {} '
                                    'failed to fetch image.'.format(
                                        job_id, kubernetes_job_id))
                                job_db[job_id]['status'] = 'failed'
                            elif 'InvalidImageName' in reason:
                                logging.info(
                                    'Job job_id: {}, kubernetes_job_id: {} '
                                    'invalid image name.'.format(
                                        job_id, kubernetes_job_id))
                                job_db[job_id]['status'] = 'failed'
                            else:
                                continue
                        except (AttributeError, TypeError) as e:
                            continue
                    else:
                        continue
                    # Grab logs when job either succeeds or fails.
                    logging.info('Getting last spawned pod for kubernetes'
                                 ' job {}'.format(kubernetes_job_id))
                    logging.info('Grabbing pod {} logs...'.format(job_id))
                    job_db[job_id]['log'] = \
                        self.get_container_logs(kubernetes_pod_job_id) or \
                        job.status.container_statuses[0].state \
                        .waiting.message
                    store_logs(job_id=job_id, logs=job_db[job_id]['log'])

                    logging.info('Cleaning Kubernetes job {} ...'.format(
                        kubernetes_job_id))
                    KubernetesJobManager.stop(kubernetes_job_id)
                    job_db[job_id]['deleted'] = True
            except client.rest.ApiException as e:
                logging.error(
                    "Error while connecting to Kubernetes API: {}".format(e))
            except Exception as e:
                logging.error(traceback.format_exc())
                logging.error("Unexpected error: {}".format(e))
Пример #6
0
def watch_jobs_kubernetes(job_db):
    """Open stream connection to k8s apiserver to watch all jobs status.

    :param job_db: Dictionary which contains all current jobs.
    """
    while True:
        logging.debug('Starting a new stream request to watch Jobs')
        try:
            w = watch.Watch()
            for event in w.stream(current_k8s_batchv1_api_client.
                                  list_job_for_all_namespaces):
                logging.info('New Job event received: {0}'.format(
                    event['type']))
                job = event['object']

                # Taking note of the remaining jobs since deletion might not
                # happen straight away.
                remaining_jobs = dict()
                for job_id, job_dict in job_db.items():
                    if not job_db[job_id]['deleted']:
                        remaining_jobs[job_dict['backend_job_id']] = job_id
                if (not job_db.get(remaining_jobs.get(job.metadata.name))
                        or job.metadata.name not in remaining_jobs):
                    # Ignore jobs not created by this specific instance
                    # or already deleted jobs.
                    continue
                job_id = remaining_jobs[job.metadata.name]
                kubernetes_job_id = job.metadata.name
                if job.status.succeeded:
                    logging.info('Job job_id: {}, kubernetes_job_id: {}'
                                 ' succeeded.'.format(job_id,
                                                      kubernetes_job_id))
                    job_db[job_id]['status'] = 'succeeded'
                elif (job.status.failed
                      and job.status.failed >= config.MAX_JOB_RESTARTS):
                    logging.info(
                        'Job job_id: {}, kubernetes_job_id: {} failed.'.format(
                            job_id, kubernetes_job_id))
                    job_db[job_id]['status'] = 'failed'
                else:
                    continue
                # Grab logs when job either succeeds or fails.
                logging.info('Getting last spawned pod for kubernetes'
                             ' job {}'.format(kubernetes_job_id))
                last_spawned_pod = \
                    current_k8s_corev1_api_client.list_namespaced_pod(
                        namespace=job.metadata.namespace,
                        label_selector='job-name={job_name}'.format(
                            job_name=kubernetes_job_id)).items[-1]
                logging.info('Grabbing pod {} logs...'.format(
                    last_spawned_pod.metadata.name))
                job_db[job_id]['log'] = \
                    current_k8s_corev1_api_client.read_namespaced_pod_log(
                        namespace=last_spawned_pod.metadata.namespace,
                        name=last_spawned_pod.metadata.name)
                store_logs(job_id=job_id, logs=job_db[job_id]['log'])

                logging.info(
                    'Cleaning Kubernetes job {} ...'.format(kubernetes_job_id))
                KubernetesJobManager.stop(kubernetes_job_id)
                job_db[job_id]['deleted'] = True
        except client.rest.ApiException as e:
            logging.debug(
                "Error while connecting to Kubernetes API: {}".format(e))
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.debug("Unexpected error: {}".format(e))
Пример #7
0
def create_job():  # noqa
    r"""Create a new job.

    ---
    post:
      summary: Creates a new job.
      description: >-
        This resource is expecting JSON data with all the necessary information
        of a new job.
      operationId: create_job
      consumes:
       - application/json
      produces:
       - application/json
      parameters:
       - name: job
         in: body
         description: Information needed to instantiate a Job
         required: true
         schema:
           $ref: '#/definitions/JobRequest'
      responses:
        201:
          description: Request succeeded. The job has been launched.
          schema:
            type: object
            properties:
              job_id:
                type: string
          examples:
            application/json:
              {
                "job_id": "cdcf48b1-c2f3-4693-8230-b066e088c6ac"
              }
        400:
          description: >-
            Request failed. The incoming data specification seems malformed.
        500:
          description: >-
            Request failed. Internal controller error. The job could probably
            not have been allocated.
    """
    json_data = request.get_json()
    if not json_data:
        return jsonify({'message': 'Empty request'}), 400

    # Validate and deserialize input
    job_request, errors = job_request_schema.load(json_data)
    if errors:
        return jsonify(errors), 400
    backend = job_request.get('backend', 'HTCondor')
    if backend == 'Kubernetes':
        job_obj = KubernetesJobManager(
            docker_img=job_request['docker_img'],
            cmd=job_request['cmd'],
            env_vars=job_request['env_vars'],
            workflow_uuid=job_request['workflow_uuid'],
            workflow_workspace=str(job_request['workflow_workspace']),
            cvmfs_mounts=job_request['cvmfs_mounts'],
            shared_file_system=job_request['shared_file_system'])
    elif backend == 'HTCondor':
        job_obj = HTCondorJobManager(
            docker_img=job_request['docker_img'],
            cmd=job_request['cmd'],
            env_vars=job_request['env_vars'],
            workflow_uuid=job_request['workflow_uuid'],
            workflow_workspace=str(job_request['workflow_workspace']),
            cvmfs_mounts=job_request['cvmfs_mounts'],
            shared_file_system=job_request['shared_file_system'])

    backend_jod_id = job_obj.execute()
    if job_obj:
        job = copy.deepcopy(job_request)
        job['status'] = 'started'
        job['restart_count'] = 0
        job['max_restart_count'] = 3
        job['deleted'] = False
        job['obj'] = job_obj
        job['job_id'] = job_obj.job_id
        job['backend_job_id'] = backend_jod_id
        JOB_DB[str(job['job_id'])] = job

        return jsonify({'job_id': job['job_id']}), 201
    else:
        return jsonify({'job': 'Could not be allocated'}), 500