Пример #1
0
def getBody(namespace='couture-console',
            jobname='nifi-test',
            containername='nifi-test',
            containerimage='sidharthc/nifi-test:alpha',
            env_vars=ENV_LIST,
            containerargs=['SFTP_TO_HDFS.py']):
    body = client.V1Job(api_version="batch/v1", kind="Job")
    # Body needs Metadata
    # Attention: Each JOB must have a different name!
    body.metadata = client.V1ObjectMeta(namespace=namespace, name=jobname)
    # And a Status
    body.status = client.V1JobStatus()
    # Now we start with the Template...
    template = client.V1PodTemplate()
    template.template = client.V1PodTemplateSpec()
    env_list = []
    for env_name, env_value in env_vars.items():
        env_list.append(client.V1EnvVar(name=env_name, value=env_value))
    container = client.V1Container(name=containername,
                                   image=containerimage,
                                   args=containerargs,
                                   env=env_list)
    template.template.spec = client.V1PodSpec(containers=[container],
                                              restart_policy='Never')
    # And finaly we can create our V1JobSpec!
    body.spec = client.V1JobSpec(ttl_seconds_after_finished=100,
                                 template=template.template)
    return body
 def _create_job_object(self,
                        name: str,
                        container_image: str,
                        namespace: str = None,
                        container_name: str = "servicecontainer",
                        env_vars: dict = {},
                        command: list = [],
                        active_deadline_seconds: int = 3600):
     namespace = self._get_namespace(namespace)
     body = client.V1Job(api_version="batch/v1", kind="Job")
     body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
     body.status = client.V1JobStatus()
     template = client.V1PodTemplate()
     template.template = client.V1PodTemplateSpec()
     env_list = []
     for env_name, env_value in env_vars.items():
         env_list.append(client.V1EnvVar(name=env_name, value=env_value))
     container = client.V1Container(name=container_name,
                                    image=container_image,
                                    env=env_list,
                                    command=command)
     template.template.spec = client.V1PodSpec(containers=[container],
                                               restart_policy='Never')
     # Set active_deadline_seconds
     body.spec = client.V1JobSpec(
         ttl_seconds_after_finished=600,
         template=template.template,
         active_deadline_seconds=active_deadline_seconds)
     return body
Пример #3
0
    def create(self, job_spec=None, pod_spec=None):
        """Creates and runs the job on the cluster.

        Args:
            job_spec: A dictionary of keyword arguments that will be passed to V1JobSpec()
            pod_spec: A dictionary of keyword arguments that will be passed to V1PodSpec()

        Returns: A dictionary containing the results of creating the job on the cluster.

        """
        if job_spec is None:
            job_spec = self.job_spec
        if pod_spec is None:
            pod_spec = self.pod_spec
        if not self._containers:
            raise ValueError(
                "Containers not found. "
                "Use add_containers() to specify containers before creating the job."
            )
        # TODO: Set the backoff limit to 1. There will be no retry if the job fails.
        # Convert job name to lower case
        job_name = str(self.job_name).lower()
        job_body = client.V1Job(kind="Job")
        job_body.metadata = client.V1ObjectMeta(namespace=self.namespace, name=job_name)
        job_body.status = client.V1JobStatus()
        template = pod_template(self._containers, self._volumes, **pod_spec)
        job_body.spec = client.V1JobSpec(template=template.template, **job_spec)
        self.creation_response = api_request(api.create_namespaced_job, self.namespace, job_body)
        return self.creation_response
Пример #4
0
def kube_create_job_object(name,
                           container_image,
                           namespace="default",
                           container_name="jobcontainer",
                           env_vars={}):
    # Body is the object Body
    body = client.V1Job(api_version="batch/v1", kind="Job")
    # Body needs Metadata
    # Attention: Each JOB must have a different name!
    body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
    # And a Status
    body.status = client.V1JobStatus()
    # Now we start with the Template...
    template = client.V1PodTemplate()
    template.template = client.V1PodTemplateSpec()
    # Passing Arguments in Env:
    env_list = []
    for env_name, env_value in env_vars.items():
        env_list.append(client.V1EnvVar(name=env_name, value=env_value))
    container = client.V1Container(name=container_name,
                                   image=container_image,
                                   env=env_list)
    template.template.spec = client.V1PodSpec(containers=[container],
                                              restart_policy='Never')
    # And finaly we can create our V1JobSpec!
    body.spec = client.V1JobSpec(ttl_seconds_after_finished=30,
                                 template=template.template)
    return body
Пример #5
0
def fake_v1_job_error():
    return client.V1Job(api_version='batch/v1',
                        kind='Job',
                        metadata=client.V1ObjectMeta(name='curry-test001',
                                                     namespace='curryns'),
                        spec=client.V1JobSpec(
                            completions=1,
                            template=client.V1PodTemplateSpec(
                                metadata=client.V1ObjectMeta(
                                    name='curry-test001', namespace='curryns'),
                                spec=client.V1PodSpec(hostname='job',
                                                      containers=['image']))),
                        status=client.V1JobStatus(succeeded=2, ))
Пример #6
0
def scheduleJobs():
    jobNames = []
    for jobParameters in request.get_json(force=True):
        if not validateJobParameters(jobParameters):
            return abort(422, 'Invalid arguments')

        body = kubeClient.V1Job(api_version="batch/v1", kind="Job")
        # Body needs Metadata
        # Attention: Each JOB must have a different name!
        jobName = "r-job-" + str(uuid.uuid4())
        body.metadata = kubeClient.V1ObjectMeta(namespace="default",
                                                name=jobName)
        # And a Status

        body.status = kubeClient.V1JobStatus()
        # Now we start with the Template...
        template = kubeClient.V1PodTemplate()
        template.template = kubeClient.V1PodTemplateSpec()
        # Passing Arguments in Env:

        env_list = createJobEnv(jobParameters, jobName)

        volume_mounts = kubeClient.V1VolumeMount(mount_path="/mydata",
                                                 name="dose-volume")
        container = kubeClient.V1Container(
            name="r-container",
            image="monikeu/r-script-1:r-image-env",
            env=env_list,
            volume_mounts=[volume_mounts],
            image_pull_policy="Always")
        per_vol_claim = kubeClient.V1PersistentVolumeClaimVolumeSource(
            claim_name="dose-volume-claim")
        volume = kubeClient.V1Volume(name="dose-volume",
                                     persistent_volume_claim=per_vol_claim)
        template.template.spec = kubeClient.V1PodSpec(containers=[container],
                                                      restart_policy='Never',
                                                      volumes=[volume])
        # And finaly we can create our V1JobSpec!
        body.spec = kubeClient.V1JobSpec(ttl_seconds_after_finished=600,
                                         template=template.template)

        try:
            response = api_instance.create_namespaced_job("default",
                                                          body,
                                                          pretty=True)
            pprint(response)
            jobNames.append(jobName)
        except ApiException as e:
            return "Error occurred during an attempt to create a job", e.status

    return 'Created one or more jobs: {}'.format(",".join(jobNames)), 201
Пример #7
0
def make_job_object(
    name: str,
    container_image: str,
    command: List[str],
    namespace: str = 'default',
    container_name: str = 'jobcontainer',
    pod_labels: Dict[str, str] = None,
    service_account_name: str = 'default',
) -> k8s_client.V1Job:
  """Make a Kubernetes Job object with a single pod.

  See
  https://kubernetes.io/docs/concepts/workloads/controllers/job/#writing-a-job-spec

  Args:
    name: Name of job.
    container_image: Name of container image.
    command: Command to run.
    namespace: Kubernetes namespace to contain this Job.
    container_name: Name of the container.
    pod_labels: Dictionary of metadata labels for the pod.
    service_account_name: Name of the service account for this Job.

  Returns:
    `kubernetes.client.V1Job` object.
  """
  pod_labels = pod_labels or {}
  return k8s_client.V1Job(
      api_version='batch/v1',
      kind='Job',
      metadata=k8s_client.V1ObjectMeta(
          namespace=namespace,
          name=sanitize_pod_name(name),
      ),
      status=k8s_client.V1JobStatus(),
      spec=k8s_client.V1JobSpec(
          template=k8s_client.V1PodTemplateSpec(
              metadata=k8s_client.V1ObjectMeta(labels=pod_labels),
              spec=k8s_client.V1PodSpec(
                  containers=[
                      k8s_client.V1Container(
                          name=container_name,
                          image=container_image,
                          command=command,
                      ),
                  ],
                  service_account_name=service_account_name,
                  restart_policy=RestartPolicy.NEVER.value,
              ),
          )),
  )
Пример #8
0
    def kube_create_job_object(
        self,
        container_image: str,
        command: t.List[str] = None,
        namespace: str = "default",
        env_vars: dict = None,
        afs_volume_name: str = None,
        azure_mount_path: str = None,
        volume_sub_path: str = None,
    ):
        """
        Create a k8 Job Object
        Minimum definition of a job object:
        {'api_version': None, - Str
        'kind': None,     - Str
        'metadata': None, - Metada Object
        'spec': None,     -V1JobSpec
        'status': None}   - V1Job Status
        Docs: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Job.md
        Docs2: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#writing-a-job-spec

        Also docs are pretty pretty bad. Best way is to ´pip install kubernetes´ and go via the autogenerated code
        And figure out the chain of objects that you need to hold a final valid object So for a job object you need:
        V1Job -> V1ObjectMeta
              -> V1JobStatus
              -> V1JobSpec -> V1PodTemplate -> V1PodTemplateSpec -> V1Container

        Now the tricky part, is that V1Job.spec needs a .template, but not a PodTemplateSpec, as such
        you need to build a PodTemplate, add a template field (template.template) and make sure
        template.template.spec is now the PodSpec.
        Then, the V1Job.spec needs to be a JobSpec which has a template the template.template field of the PodTemplate.
        Failure to do so will trigger an API error.

        Also Containers must be a list!

        Docs3: https://github.com/kubernetes-client/python/issues/589
        """
        # Body is the object Body
        body = client.V1Job(api_version="batch/v1", kind="Job")
        # Body needs Metadata
        # Attention: Each JOB must have a different name!
        name = id_generator()
        body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
        # And a Status
        body.status = client.V1JobStatus()
        # Now we start with the Template...
        template = client.V1PodTemplate()
        template.template = client.V1PodTemplateSpec()
        # Passing Arguments in Env:
        env_list = []
        if env_vars:
            for env_name, env_value in env_vars.items():
                env_list.append(client.V1EnvVar(name=env_name,
                                                value=env_value))

        if volume_sub_path is not None:
            volumes, volume_mounts = self.prepare_azure_volumes(
                volume_sub_path=volume_sub_path,
                afs_volume_name=afs_volume_name,
                azure_mount_path=azure_mount_path)
        container = client.V1Container(name=name,
                                       image=container_image,
                                       env=env_list,
                                       command=command,
                                       volume_mounts=volume_mounts)
        template.template.spec = client.V1PodSpec(containers=[container],
                                                  restart_policy='Never',
                                                  volumes=volumes)
        # And finaly we can create our V1JobSpec!
        body.spec = client.V1JobSpec(ttl_seconds_after_finished=600,
                                     template=template.template)
        return body
Пример #9
0
def create_job(name,
               configmap_name,
               container_name,
               container_image,
               container_command,
               namespace="default",
               env_vars={}):
    """
    Create a k8 Job Object
    Args:
        name:
        configmap_name:
        container_name:
        container_image:
        container_command:list类型,执行程序的命令,例如:['python','/home/test.py']
        namespace:
        env_vars: 环境变量

    Returns:

    """
    try:
        # Body是对象体
        body = client.V1Job(api_version="batch/v1", kind="Job")

        # 对象需要 Metadata,每个JOB必须有一个不同的名称!
        body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
        # 添加 Status
        body.status = client.V1JobStatus()

        # 开始 Template...
        template = client.V1PodTemplateSpec()

        # 在Env中传递Arguments:
        env_list = []
        for env_name, env_value in env_vars.items():
            env_list.append(client.V1EnvVar(name=env_name, value=env_value))

        container = client.V1Container(command=container_command,
                                       env=env_list,
                                       image=container_image,
                                       image_pull_policy="IfNotPresent",
                                       name=container_name)

        volume_mount = client.V1VolumeMount(name="config-volume",
                                            mount_path=mount_path)
        container.volume_mounts = [volume_mount]

        config_map = client.V1ConfigMapVolumeSource(name=configmap_name)

        volumes = client.V1Volume(name="config-volume", config_map=config_map)

        template.spec = client.V1PodSpec(containers=[container],
                                         restart_policy='OnFailure',
                                         volumes=[volumes],
                                         node_selector={'gpu': 'true'})

        # 最后,创建V1JobSpec
        body.spec = client.V1JobSpec(ttl_seconds_after_finished=600,
                                     template=template)

        response = batch_v1_api.create_namespaced_job(namespace,
                                                      body,
                                                      pretty=True)

        return True, response
    except Exception as ex:
        print(ex)
        return False, "k8s Job Object creates Failed!"
Пример #10
0
def create_site_ingress(site_name):
    k8s_settings = frappe.get_single("K8s Bench Settings")

    if (
        not k8s_settings.namespace
        or not k8s_settings.wildcard_domain
        or not k8s_settings.wildcard_tls_secret_name
        or not k8s_settings.cert_manager_cluster_issuer
    ):
        not_set = "NOT_SET"
        out = {
            "namespace": k8s_settings.namespace or not_set,
            "wildcard_domain": k8s_settings.wildcard_domain or not_set,
            "wildcard_tls_secret_name": k8s_settings.wildcard_tls_secret_name
            or not_set,
            "cert_manager_cluster_issuer": k8s_settings.cert_manager_cluster_issuer
            or not_set,
        }
        frappe.local.response["http_status_code"] = 501
        return out

    load_config()
    networking_v1_api = client.NetworkingV1beta1Api()

    body = client.NetworkingV1beta1Ingress()

    body.metadata = client.V1ObjectMeta(
        namespace=k8s_settings.namespace,
        name=site_name,
        annotations={
            "cert-manager.io/cluster-issuer": k8s_settings.cert_manager_cluster_issuer
        },
    )
    body.status = client.V1JobStatus()

    body.spec = client.NetworkingV1beta1IngressSpec(
        rules=[
            client.NetworkingV1beta1IngressRule(
                host=site_name,
                http=client.NetworkingV1beta1HTTPIngressRuleValue(
                    paths=[
                        client.NetworkingV1beta1HTTPIngressPath(
                            backend=client.NetworkingV1beta1IngressBackend(
                                service_name=k8s_settings.service_name, service_port=80
                            )
                        )
                    ]
                ),
            ),
        ],
        tls=[
            client.NetworkingV1beta1IngressTLS(
                hosts=[f"*.{k8s_settings.wildcard_domain}"],
                secret_name=k8s_settings.wildcard_tls_secret_name,
            ),
        ],
    )

    try:
        ingress = networking_v1_api.create_namespaced_ingress(
            k8s_settings.namespace, body
        )
        return to_dict(ingress)
    except (ApiException, Exception) as e:
        status_code = getattr(e, "status", 500)
        out = {"error": e, "params": {"site_name": site_name}}
        reason = getattr(e, "reason")
        if reason:
            out["reason"] = reason
        frappe.log_error(
            out, "Exception: NetworkingV1beta1Api->create_namespaced_ingress"
        )
        frappe.local.response["http_status_code"] = status_code
        return out
Пример #11
0
    def start_job(self, nuts_id):
        input_path = self.input_path
        output_path = self.pers_path + '/' + self.experiment_id
        pers_path = self.pers_path
        resolution = self.resolution
        experiment_id = self.experiment_id
        nuts_id = nuts_id.lower()

        container_name = 'br-process-raster'
        container_image = 'harbor.tilyes.eu/eugis/br-process-raster'
        body = client.V1Job(api_version="batch/v1", kind="Job")
        body.metadata = client.V1ObjectMeta(
            namespace=self.namespace,
            name=f'{container_name}-{experiment_id}-{nuts_id}')
        body.status = client.V1JobStatus()
        template = client.V1PodTemplate()
        template.template = client.V1PodTemplateSpec()
        container = client.V1Container(
            name=container_name,
            image=container_image,
            resources=client.V1ResourceRequirements(requests={
                'cpu': '1',
                'memory': '4Gi',
            },
                                                    limits={
                                                        'cpu': '2',
                                                        'memory': '6Gi',
                                                    }),
            volume_mounts=[
                client.V1VolumeMount(mount_path=self.input_path,
                                     name="source"),
                client.V1VolumeMount(mount_path=self.pers_path,
                                     name="persistence"),
                client.V1VolumeMount(mount_path="/app/config/",
                                     name="hu-raster-config")
            ],
            command=[
                'sh', '-c',
                f'python3 worker.py --input-path {input_path} --resolution {resolution} --output-path {output_path} --pers-path {pers_path} --nuts-id {nuts_id} && curl -X POST http://localhost:15020/quitquitquit'
            ])
        template.template.spec = client.V1PodSpec(
            containers=[container],
            volumes=[
                client.V1Volume(name='source',
                                persistent_volume_claim=client.
                                V1PersistentVolumeClaimVolumeSource(
                                    claim_name=self.source_pvc)),
                client.V1Volume(name='persistence',
                                persistent_volume_claim=client.
                                V1PersistentVolumeClaimVolumeSource(
                                    claim_name=self.persistence_pvc)),
                client.V1Volume(
                    name="hu-raster-config",
                    config_map=client.V1ConfigMapVolumeSource(
                        name=f"kf-pipeline-hu-raster-config-{experiment_id}"))
            ],
            restart_policy='Never')
        body.spec = client.V1JobSpec(template=template.template,
                                     ttl_seconds_after_finished=10)
        self.delete_job(f'{container_name}-{experiment_id}-{nuts_id}')
        self.v1.create_namespaced_job(self.namespace, body, pretty='true')

        return f'{container_name}-{experiment_id}-{nuts_id}'
Пример #12
0
def create_job_object(name,
                      container_image,
                      command,
                      args=None,
                      namespace="default",
                      container_name="jobcontainer",
                      env_vars=None,
                      restart_policy='Never',
                      ttl_finished=180,
                      secret_names=None,
                      backoff_limit=0,
                      volume_mappings=None):

    if settings.TASK_DELETE_SUCCESSFUL_PODS or settings.TASK_DELETE_FAILED_PODS:
        cleanup_pods(delete_succeeded=settings.TASK_DELETE_SUCCESSFUL_PODS,
                     delete_failed=settings.TASK_DELETE_FAILED_PODS,
                     namespace=namespace)

    if env_vars is None:
        env_vars = {}
    if secret_names is None:
        secret_names = []
    if args is None:
        args = []
    if volume_mappings is None:
        volume_mappings = []

    body = client.V1Job(api_version="batch/v1", kind="Job")
    # metadata and status are required
    body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
    body.status = client.V1JobStatus()

    template = client.V1PodTemplate()
    template.template = client.V1PodTemplateSpec()

    api_client = client.BatchV1Api()

    # Set env variables
    env_list = []
    for env_name, env_value in env_vars.items():
        env_list.append(client.V1EnvVar(name=env_name, value=env_value))

    env_from = []
    for secret_name in secret_names:
        env_from.append(
            client.V1EnvFromSource(secret_ref=client.V1SecretEnvSource(
                name=secret_name)))

    volumes = []
    volume_mounts = []
    for i, volume_mapping in enumerate(volume_mappings):
        volume = client.V1Volume(name=f'volume-{i}',
                                 host_path=client.V1HostPathVolumeSource(
                                     path=volume_mapping['host_path']))
        volumes.append(volume)
        volume_mounts.append(
            client.V1VolumeMount(name=f'volume-{i}',
                                 mount_path=volume_mapping['mount_path']))

    # set container options
    container = client.V1Container(
        name=container_name,
        image=container_image,
        env=env_list,
        command=command,
        args=args,
        env_from=env_from,
        volume_mounts=volume_mounts,
        image_pull_policy=settings.TASK_IMAGE_PULL_POLICY)

    # set pod options
    template.template.spec = client.V1PodSpec(
        containers=[container],
        restart_policy=restart_policy,
        volumes=volumes,
        service_account_name='collabovid-sa')

    body.spec = client.V1JobSpec(ttl_seconds_after_finished=ttl_finished,
                                 template=template.template,
                                 backoff_limit=backoff_limit)

    return body
Пример #13
0
def submit_job(args, command=None):
    container_image = args.container
    container_name = args.name

    body = client.V1Job(api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name=container_name))
    body.status = client.V1JobStatus()
    template = client.V1PodTemplate()

    labels = {
        'hugin-job': "1",
        'hugin-job-name': f'{container_name}'
    }
    template.template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(labels=labels)
    )

    tolerations = []
    env = []
    if args.environment:
        for env_spec in args.environment:
            env_name,env_value = env_spec.split("=", 1)
            env.append(client.V1EnvVar(name=env_name, value=env_value))

    containe_args = dict(
        name=f"container-{container_name}",
        image=container_image,
        env=env,
    )

    if args.gpu:
        tolerations.append(client.V1Toleration(
        key='nvidia.com/gpu', operator='Exists', effect='NoSchedule'))
        containe_args['resources'] = client.V1ResourceRequirements(limits={"nvidia.com/gpu": 1})
    if command or args.command:
        containe_args['command'] = command if command else args.command

    container = client.V1Container(**containe_args)
    pull_secrets = []
    if args.pull_secret is not None:
        pull_secrets.append(client.V1LocalObjectReference(name=args.pull_secret))
    pod_args = dict(containers=[container],
                    restart_policy='Never',
                    image_pull_secrets=pull_secrets)


    if tolerations:
        pod_args['tolerations'] = tolerations

    if args.node_selector is not None:
        parts = args.node_selector.split("=", 1)
        if len(parts) == 2:
            affinity = client.V1Affinity(
                node_affinity=client.V1NodeAffinity(
                    required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
                        node_selector_terms=[client.V1NodeSelectorTerm(
                            match_expressions=[client.V1NodeSelectorRequirement(
                                key=parts[0], operator='In', values=[parts[1]])]
                        )]
                    )
                )
            )
            pod_args['affinity'] = affinity

    template.template.spec = client.V1PodSpec(**pod_args)
    body.spec = client.V1JobSpec(ttl_seconds_after_finished=1800, template=template.template)
    try:
        api_response = batch_v1.create_namespaced_job("default", body, pretty=True)
        #print (api_response)
    except client.exceptions.ApiException as e:
        logging.critical(f"Failed to start job: {e.reason}")
Пример #14
0
def create_upgrade_job(site_name, base_pvc_name):
    not_set = "NOT_SET"

    if not site_name or not base_pvc_name:
        frappe.local.response["http_status_code"] = 400
        return {
            "site_name": site_name or not_set,
            "base_pvc_name": base_pvc_name or not_set,
        }

    k8s_settings = frappe.get_single("K8s Bench Settings")

    if (
        not k8s_settings.namespace
        or not k8s_settings.nginx_image
        or not k8s_settings.python_image
        or not k8s_settings.pvc_name
    ):
        out = {
            "namespace": k8s_settings.namespace or not_set,
            "nginx_image": k8s_settings.nginx_image or not_set,
            "python_image": k8s_settings.python_image or not_set,
            "pvc_name": k8s_settings.pvc_name or not_set,
        }
        frappe.local.response["http_status_code"] = 501
        return out

    job_name = f"{UPGRADE_SITE}-{site_name}"
    load_config()

    batch_v1_api = client.BatchV1Api()

    body = client.V1Job(api_version="batch/v1", kind="Job")
    body.metadata = client.V1ObjectMeta(namespace=k8s_settings.namespace, name=job_name)
    body.status = client.V1JobStatus()
    body.spec = client.V1JobSpec(
        template=client.V1PodTemplateSpec(
            spec=client.V1PodSpec(
                init_containers=[
                    client.V1Container(
                        name="populate-assets",
                        image=k8s_settings.nginx_image,
                        command=["/bin/bash", "-c"],
                        args=["rsync -a --delete /var/www/html/assets/frappe /assets"],
                        volume_mounts=[
                            client.V1VolumeMount(
                                name="assets-cache", mount_path="/assets"
                            ),
                        ],
                    )
                ],
                security_context=client.V1PodSecurityContext(
                    supplemental_groups=[1000]
                ),
                containers=[
                    client.V1Container(
                        name="upgrade-site",
                        image=k8s_settings.python_image,
                        command=["/home/frappe/frappe-bench/env/bin/python"],
                        args=["/home/frappe/frappe-bench/commands/upgrade_site.py"],
                        volume_mounts=[
                            client.V1VolumeMount(
                                name=SITES_DIR,
                                mount_path="/home/frappe/frappe-bench/sites",
                            ),
                            client.V1VolumeMount(
                                name=BASE_SITES_DIR, mount_path="/opt/base-sites"
                            ),
                            client.V1VolumeMount(
                                name=UPGRADE_SITE,
                                mount_path="/home/frappe/frappe-bench/commands",
                            ),
                            client.V1VolumeMount(
                                name=ASSETS_CACHE, mount_path="/assets"
                            ),
                        ],
                        env=[
                            client.V1EnvVar(name="SITE_NAME", value=site_name),
                            client.V1EnvVar(
                                name="FROM_BENCH_PATH", value="/opt/base-sites"
                            ),
                        ],
                    )
                ],
                restart_policy="Never",
                volumes=[
                    client.V1Volume(
                        name=SITES_DIR,
                        persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                            claim_name=k8s_settings.pvc_name, read_only=False
                        ),
                    ),
                    client.V1Volume(
                        name=BASE_SITES_DIR,
                        persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                            claim_name=base_pvc_name,
                            read_only=False,
                        ),
                    ),
                    client.V1Volume(
                        name=UPGRADE_SITE,
                        config_map=client.V1ConfigMapVolumeSource(name=UPGRADE_SITE),
                    ),
                    client.V1Volume(
                        name=ASSETS_CACHE, empty_dir=client.V1EmptyDirVolumeSource()
                    ),
                ],
            )
        )
    )

    try:
        api_response = batch_v1_api.create_namespaced_job(
            k8s_settings.namespace, body, pretty=True
        )
        return job_name + " created"
    except (ApiException, Exception) as e:
        status_code = getattr(e, "status", 500)
        out = {
            "error": e,
            "params": {"site_name": site_name, "base_pvc_name": base_pvc_name},
        }
        reason = getattr(e, "reason")
        if reason:
            out["reason"] = reason

        frappe.log_error(out, "Exception: BatchV1Api->create_namespaced_job")
        frappe.local.response["http_status_code"] = status_code
        return out
Пример #15
0
def create_kb8s_job(workflow_id, minion_cmd, cluster):
    configuration = client.Configuration()
    configuration.host = cluster['address']
    configuration.verify_ssl = False
    configuration.debug = False
    if 'general_parameters' not in cluster:
        raise ValueError('Incorrect cluster config.')

    cluster_params = {}
    for parameter in cluster['general_parameters'].split(','):
        key, value = parameter.split('=')
        if key.startswith('kubernetes'):
            cluster_params[key] = value
    env_vars = {
        'HADOOP_CONF_DIR': '/usr/local/juicer/conf',
    }

    token = cluster['auth_token']
    configuration.api_key = {"authorization": "Bearer " + token}
    # noinspection PyUnresolvedReferences
    client.Configuration.set_default(configuration)

    job = client.V1Job(api_version="batch/v1", kind="Job")
    name = 'job-{}'.format(workflow_id)
    container_name = 'juicer-job'
    container_image = cluster_params['kubernetes.container']
    namespace = cluster_params['kubernetes.namespace']
    pull_policy = cluster_params.get('kubernetes.pull_policy', 'Always')

    gpus = int(cluster_params.get('kubernetes.resources.gpus', 0))

    print('-' * 30)
    print('GPU KB8s specification: ' + str(gpus))
    print('-' * 30)
    log.info('GPU specification: %s', gpus)

    job.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
    job.status = client.V1JobStatus()

    # Now we start with the Template...
    template = client.V1PodTemplate()
    template.template = client.V1PodTemplateSpec()

    # Passing Arguments in Env:
    env_list = []
    for env_name, env_value in env_vars.items():
        env_list.append(client.V1EnvVar(name=env_name, value=env_value))

    # Subpath implies that the file is stored as a config map in kb8s
    volume_mounts = [
        client.V1VolumeMount(
            name='juicer-config',
            sub_path='juicer-config.yaml',
            mount_path='/usr/local/juicer/conf/juicer-config.yaml'),
        client.V1VolumeMount(
            name='hdfs-site',
            sub_path='hdfs-site.xml',
            mount_path='/usr/local/juicer/conf/hdfs-site.xml'),
        client.V1VolumeMount(name='hdfs-pvc', mount_path='/srv/storage/'),
    ]
    pvc_claim = client.V1PersistentVolumeClaimVolumeSource(
        claim_name='hdfs-pvc')

    if gpus:
        resources = {'limits': {'nvidia.com/gpu': gpus}}
    else:
        resources = {}

    container = client.V1Container(name=container_name,
                                   image=container_image,
                                   env=env_list,
                                   command=minion_cmd,
                                   image_pull_policy=pull_policy,
                                   volume_mounts=volume_mounts,
                                   resources=resources)

    volumes = [
        client.V1Volume(
            name='juicer-config',
            config_map=client.V1ConfigMapVolumeSource(name='juicer-config')),
        client.V1Volume(
            name='hdfs-site',
            config_map=client.V1ConfigMapVolumeSource(name='hdfs-site')),
        client.V1Volume(name='hdfs-pvc', persistent_volume_claim=pvc_claim),
    ]
    template.template.spec = client.V1PodSpec(containers=[container],
                                              restart_policy='Never',
                                              volumes=volumes)

    # And finally we can create our V1JobSpec!
    job.spec = client.V1JobSpec(ttl_seconds_after_finished=10,
                                template=template.template)
    api = client.ApiClient(configuration)
    batch_api = client.BatchV1Api(api)

    try:
        batch_api.create_namespaced_job(namespace, job, pretty=True)
    except ApiException as e:
        body = json.loads(e.body)
        if body['reason'] == 'AlreadyExists':
            pass
        else:
            print("Exception when calling BatchV1Api->: {}\n".format(e))
Пример #16
0
def create_job_object(name: str,
                      container_image: str,
                      env_list: dict,
                      command: List[str],
                      command_args: List[str],
                      volumes: List[Dict],
                      init_containers: List[Dict],
                      output: Output,
                      namespace: str = "stackl",
                      container_name: str = "jobcontainer",
                      api_version: str = "batch/v1",
                      image_pull_policy: str = "Always",
                      ttl_seconds_after_finished: int = 3600,
                      restart_policy: str = "Never",
                      backoff_limit: int = 0,
                      active_deadline_seconds: int = 3600,
                      service_account: str = "stackl-agent-stackl-agent",
                      image_pull_secrets: List[str] = [],
                      labels=None) -> client.V1Job:
    # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
    """Creates a Job object using the Kubernetes client

    :param name: Job name affix
    :type name: str
    :param container_image: automation container image
    :type container_image: str
    :param env_list: Dict with key/values for the environment inside the automation container
    :type env_list: dict
    :param command: entrypoint command
    :type command: List[str]
    :param command_args: command arguments
    :type command_args: List[str]
    :param volumes: volumes and volumemounts
    :type volumes: List[Dict]
    :param image_pull_secrets: secrets to pull images
    :type image_pull_secrets: List[str]
    :param init_containers: list with init_containers
    :type init_containers: List[Dict]
    :param output: output Object
    :type output: Output
    :param namespace: Kubernetes namespace, defaults to "stackl"
    :type namespace: str, optional
    :param container_name: name of automation container, defaults to "jobcontainer"
    :type container_name: str, optional
    :param api_version: Job api version, defaults to "batch/v1"
    :type api_version: str, optional
    :param image_pull_policy: always pull latest images, defaults to "Always"
    :type image_pull_policy: str, optional
    :param ttl_seconds_after_finished: Remove jobs after execution with ttl, defaults to 600
    :type ttl_seconds_after_finished: int, optional
    :param restart_policy: Restart the pod on the same node after failure, defaults to "Never"
    :type restart_policy: str, optional
    :param backoff_limit: Retries after failure, defaults to 0
    :type backoff_limit: int, optional
    :param active_deadline_seconds: Timeout on a job, defaults to 3600 seconds
    :type active_deadline_seconds: int, optional
    :param service_account: Kubernetes service account, defaults to "stackl-agent-stackl-agent"
    :type service_account: str, optional
    :param labels: metadata labels, defaults to {}
    :type labels: dict, optional
    :return: automation Job object
    :rtype: client.V1Job
    """
    id_job = id_generator()
    name = name + "-" + id_job
    body = client.V1Job(api_version=api_version, kind="Job")
    body.metadata = client.V1ObjectMeta(namespace=namespace, name=name)
    body.status = client.V1JobStatus()
    template = client.V1PodTemplate()
    template.template = client.V1PodTemplateSpec()
    k8s_volumes = []

    cms = []

    logging.debug(f"volumes: {volumes}")
    # create a k8s volume for each element in volumes
    for vol in volumes:
        vol_name = name + "-" + vol["name"]
        k8s_volume = client.V1Volume(name=vol_name)
        if vol["type"] == "config_map":
            config_map = client.V1ConfigMapVolumeSource()
            config_map.name = vol_name
            k8s_volume.config_map = config_map
            cms.append(create_cm(vol_name, namespace, vol['data']))
            vol['name'] = vol_name
        if vol["type"] == "empty_dir":
            k8s_volume.empty_dir = client.V1EmptyDirVolumeSource(
                medium="Memory")
            vol['name'] = vol_name
        k8s_volumes.append(k8s_volume)

    logging.debug(f"Volumes created for job {name}: {k8s_volumes}")

    # create a volume mount for each element in volumes
    k8s_volume_mounts = []
    for vol in volumes:
        if vol["mount_path"]:
            volume_mount = client.V1VolumeMount(name=vol["name"],
                                                mount_path=vol["mount_path"])
            if "sub_path" in vol:
                volume_mount.sub_path = vol["sub_path"]
            k8s_volume_mounts.append(volume_mount)

    logging.debug(f"Volume mounts created for job {name}: {k8s_volume_mounts}")

    # create an environment list
    k8s_env_list = []

    if env_list:
        for key, value in env_list.items():
            if isinstance(value, dict):
                if 'config_map_key_ref' in value:
                    k8s_env_from = client.V1EnvVar(
                        name=key,
                        value_from=client.V1EnvVarSource(
                            config_map_key_ref=client.V1ConfigMapKeySelector(
                                name=value['config_map_key_ref']["name"],
                                key=value['config_map_key_ref']["key"])))
                    k8s_env_list.append(k8s_env_from)
                elif 'field_ref' in value:
                    k8s_env_from = client.V1EnvVar(
                        name=key,
                        value_from=client.V1EnvVarSource(
                            field_ref=client.V1ObjectFieldSelector(
                                field_path=value['field_ref'])))
                    k8s_env_list.append(k8s_env_from)
            else:
                k8s_env = client.V1EnvVar(name=key, value=value)
                k8s_env_list.append(k8s_env)

    k8s_env_from_list = []

    # if env_from:
    #     for env in env_from:
    #         if 'config_map_ref' in env:
    #             k8s_env_from = client.V1EnvFromSource(
    #                 config_map_ref=env['config_map_ref'])
    #             k8s_env_from_list.append(k8s_env_from)
    #         elif 'secret_ref' in env:
    #             k8s_env_from = client.V1EnvFromSource(
    #                 secret_ref=env['secret_ref'])
    #             k8s_env_from_list.append(k8s_env_from)

    logging.debug(f"Environment list created for job {name}: {k8s_env_list}")
    print(f"Environment list created for job {name}: {k8s_env_list}")

    container = client.V1Container(name=container_name,
                                   image=container_image,
                                   env=k8s_env_list,
                                   volume_mounts=k8s_volume_mounts,
                                   image_pull_policy=image_pull_policy,
                                   command=command,
                                   args=command_args,
                                   env_from=k8s_env_from_list)

    k8s_init_containers = []

    logging.debug(f"Init containers for job {name}: {init_containers}")
    for c in init_containers:
        k8s_c = client.V1Container(name=c['name'],
                                   image=c['image'],
                                   volume_mounts=k8s_volume_mounts,
                                   env=k8s_env_list)

        if 'args' in c:
            k8s_c.args = c['args']

        k8s_init_containers.append(k8s_c)

    k8s_secrets = []
    for secret in image_pull_secrets:
        k8s_secrets.append(client.V1LocalObjectReference(name=secret))

    logging.debug(f"Secret list created for job {name}: {k8s_secrets}")

    containers = [container]
    if output:
        output.volume_mounts = k8s_volume_mounts
        output.env = k8s_env_list
        output_containers = output.containers
        containers = containers + output_containers

    template.template.metadata = client.V1ObjectMeta(labels=labels)
    template.template.spec = client.V1PodSpec(
        containers=containers,
        restart_policy=restart_policy,
        image_pull_secrets=k8s_secrets,
        volumes=k8s_volumes,
        init_containers=k8s_init_containers,
        service_account_name=service_account)
    template.template = client.V1PodTemplateSpec(
        metadata=template.template.metadata, spec=template.template.spec)
    body.spec = client.V1JobSpec(
        ttl_seconds_after_finished=ttl_seconds_after_finished,
        template=template.template,
        backoff_limit=backoff_limit,
        active_deadline_seconds=active_deadline_seconds)

    return body, cms
Пример #17
0
def create_job(MODEL):

    assert MODEL is not None, "model name is None, cannot spawn a new worker"

    api = client.BatchV1Api()

    body = client.V1Job(api_version="batch/v1", kind="Job")
    name = 'speechlab-worker-job-{}-{}'.format(MODEL.lower().replace("_", "-"),
                                               id_generator())
    body.metadata = client.V1ObjectMeta(namespace=NAMESPACE, name=name)
    body.status = client.V1JobStatus()
    template = client.V1PodTemplate()
    template.template = client.V1PodTemplateSpec()
    template.template.metadata = client.V1ObjectMeta(
        annotations={
            "prometheus.io/scrape": "true",
            "prometheus.io/port": "8081"
        })
    azure_file_volume = client.V1AzureFileVolumeSource(
        read_only=True,
        secret_name=MODELS_FILESHARE_SECRET,
        share_name=MODELS_SHARE_NAME)
    volume = client.V1Volume(name="models-azurefiles",
                             azure_file=azure_file_volume)
    env_vars = {
        "AZURE_STORAGE_ACCOUNT": AZURE_STORAGE_ACCOUNT,
        "AZURE_STORAGE_ACCESS_KEY": AZURE_STORAGE_ACCESS_KEY,
        "AZURE_CONTAINER": AZURE_CONTAINER,
        "MASTER": MASTER,
        "NAMESPACE": NAMESPACE,
        "RUN_FREQ": "ONCE",
        "MODEL_DIR": MODEL,  # important
        "MODELS_FILESHARE_SECRET": MODELS_FILESHARE_SECRET,
        "MODELS_SHARE_NAME": MODELS_SHARE_NAME
    }

    env_list = []
    if env_vars:
        for env_name, env_value in env_vars.items():
            env_list.append(client.V1EnvVar(name=env_name, value=env_value))

    container = client.V1Container(
        name='{}-c'.format(name),
        image=IMAGE,
        image_pull_policy="IfNotPresent",
        command=[
            "/home/appuser/opt/tini", "--", "/home/appuser/opt/start_worker.sh"
        ],
        env=env_list,
        ports=[client.V1ContainerPort(container_port=8081, name="prometheus")],
        security_context=client.V1SecurityContext(
            privileged=True,
            capabilities=client.V1Capabilities(add=["SYS_ADMIN"])),
        resources=client.V1ResourceRequirements(limits={
            "memory": "5G",
            "cpu": "1"
        },
                                                requests={
                                                    "memory": "5G",
                                                    "cpu": "1"
                                                }),
        volume_mounts=[
            client.V1VolumeMount(mount_path="/home/appuser/opt/models",
                                 name="models-azurefiles",
                                 read_only=True)
        ])
    template.template.spec = client.V1PodSpec(
        containers=[container],
        image_pull_secrets=[{
            "name": "azure-cr-secret"
        }],
        # reason to use OnFailure https://github.com/kubernetes/kubernetes/issues/20255
        restart_policy="OnFailure",
        volumes=[volume])

    # And finaly we can create our V1JobSpec!
    body.spec = client.V1JobSpec(ttl_seconds_after_finished=100,
                                 template=template.template)

    try:
        api_response = api.create_namespaced_job(NAMESPACE, body)
        print("api_response=" + str(api_response))
        return True
    except ApiException as e:
        logging.exception('error spawning new job')
        print("Exception when creating a job: %s\n" % e)