示例#1
0
    def test_pod_spec(self):
        cluster_spec = ClusterSpec(cluster_spec_json=test_spec)
        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, "other")

        self.assertEqual(
            pod.metadata.labels["elasticdl.org/app-name"], "elasticdl"
        )
        self.assertEqual(pod.metadata.labels["elasticdl.org/site"], "hangzhou")
        self.assertEqual(
            pod.metadata.annotations["tag.elasticdl.org/optimization"],
            "enabled",
        )
        expected_tolerations = [
            client.V1Toleration(
                effect="NoSchedule",
                key="elasticdl.org/logic-pool",
                operator="Equal",
                value="ElasticDL",
            )
        ]
        self.assertEqual(pod.spec.tolerations, expected_tolerations)
        match_expressions = [
            client.V1NodeSelectorRequirement(
                key="elasticdl.org/logic-pool",
                operator="In",
                values=["ElasticDL"],
            )
        ]

        expected_affinity = client.V1Affinity(
            node_affinity=client.V1NodeAffinity(
                required_during_scheduling_ignored_during_execution=(
                    client.V1NodeSelector(
                        node_selector_terms=[
                            client.V1NodeSelectorTerm(
                                match_expressions=match_expressions
                            )
                        ]
                    )
                )
            )
        )
        self.assertEqual(pod.spec.affinity, expected_affinity)

        expected_env = []
        expected_env.append(client.V1EnvVar(name="LOG_ENABLED", value="true"))
        self.assertEqual(pod.spec.containers[0].env, expected_env)

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.MASTER)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Sun")

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.WORKER)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Earth")

        pod = create_test_pod("test_spec")
        pod = cluster_spec.patch_pod(pod, PodType.PS)
        self.assertEqual(pod.metadata.labels["elasticdl.org/xyz"], "Moon")
def pv_create(core_v1_api, pv_name):
    core_v1_api = core_v1_api
    body = client.V1PersistentVolume(
        api_version="v1",
        kind="PersistentVolume",
        metadata=client.V1ObjectMeta(name=pv_name, labels={"key": "localpvs"}),
        spec=client.V1PersistentVolumeSpec(
            capacity={"storage": "0.5Gi"},
            volume_mode="Filesystem",
            access_modes=["ReadWriteOnce"],
            persistent_volume_reclaim_policy="Recycle",
            local={
                "path":
                "/home/damu/Documents/kubernet/project/CDN_project/volumes/{_name}"
                .format(_name=pv_name)
            },
            node_affinity=client.V1VolumeNodeAffinity(
                required=client.V1NodeSelector([
                    client.V1NodeSelectorTerm(match_expressions=[
                        client.V1NodeSelectorRequirement(
                            key="kubernetes.io/hostname",
                            operator="In",
                            values=["minikube"])
                    ])
                ]))))
    core_v1_api.create_persistent_volume(body=body)
示例#3
0
    def get_affinity(self):
        """Determine the affinity term for the build pod.

        There are a two affinity strategies, which one is used depends on how
        the BinderHub is configured.

        In the default setup the affinity of each build pod is an "anti-affinity"
        which causes the pods to prefer to schedule on separate nodes.

        In a setup with docker-in-docker enabled pods for a particular
        repository prefer to schedule on the same node in order to reuse the
        docker layer cache of previous builds.
        """
        resp = self.api.list_namespaced_pod(
            self.namespace,
            label_selector="component=dind,app=binder",
            _request_timeout=KUBE_REQUEST_TIMEOUT,
            _preload_content=False,
        )
        dind_pods = json.loads(resp.read())

        if self.sticky_builds and dind_pods:
            node_names = [
                pod["spec"]["nodeName"] for pod in dind_pods["items"]
            ]
            ranked_nodes = rendezvous_rank(node_names, self.repo_url)
            best_node_name = ranked_nodes[0]

            affinity = client.V1Affinity(node_affinity=client.V1NodeAffinity(
                preferred_during_scheduling_ignored_during_execution=[
                    client.V1PreferredSchedulingTerm(
                        weight=100,
                        preference=client.V1NodeSelectorTerm(
                            match_expressions=[
                                client.V1NodeSelectorRequirement(
                                    key="kubernetes.io/hostname",
                                    operator="In",
                                    values=[best_node_name],
                                )
                            ]),
                    )
                ]))

        else:
            affinity = client.V1Affinity(
                pod_anti_affinity=client.V1PodAntiAffinity(
                    preferred_during_scheduling_ignored_during_execution=[
                        client.V1WeightedPodAffinityTerm(
                            weight=100,
                            pod_affinity_term=client.V1PodAffinityTerm(
                                topology_key="kubernetes.io/hostname",
                                label_selector=client.V1LabelSelector(
                                    match_labels=dict(
                                        component=self._component_label)),
                            ),
                        )
                    ]))

        return affinity
示例#4
0
文件: base.py 项目: AlonMaor14/mlrun
 def _generate_affinity(self):
     return k8s_client.V1Affinity(
         node_affinity=k8s_client.V1NodeAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PreferredSchedulingTerm(
                     weight=1,
                     preference=k8s_client.V1NodeSelectorTerm(
                         match_expressions=[
                             k8s_client.V1NodeSelectorRequirement(
                                 key="some_node_label",
                                 operator="In",
                                 values=[
                                     "possible-label-value-1",
                                     "possible-label-value-2",
                                 ],
                             )
                         ]),
                 )
             ],
             required_during_scheduling_ignored_during_execution=k8s_client.
             V1NodeSelector(node_selector_terms=[
                 k8s_client.V1NodeSelectorTerm(match_expressions=[
                     k8s_client.V1NodeSelectorRequirement(
                         key="some_node_label",
                         operator="In",
                         values=[
                             "required-label-value-1",
                             "required-label-value-2",
                         ],
                     )
                 ]),
             ]),
         ),
         pod_affinity=k8s_client.V1PodAffinity(
             required_during_scheduling_ignored_during_execution=[
                 k8s_client.V1PodAffinityTerm(
                     label_selector=k8s_client.V1LabelSelector(
                         match_labels={
                             "some-pod-label-key": "some-pod-label-value"
                         }),
                     namespaces=["namespace-a", "namespace-b"],
                     topology_key="key-1",
                 )
             ]),
         pod_anti_affinity=k8s_client.V1PodAntiAffinity(
             preferred_during_scheduling_ignored_during_execution=[
                 k8s_client.V1WeightedPodAffinityTerm(
                     weight=1,
                     pod_affinity_term=k8s_client.V1PodAffinityTerm(
                         label_selector=k8s_client.V1LabelSelector(
                             match_expressions=[
                                 k8s_client.V1LabelSelectorRequirement(
                                     key="some_pod_label",
                                     operator="NotIn",
                                     values=[
                                         "forbidden-label-value-1",
                                         "forbidden-label-value-2",
                                     ],
                                 )
                             ]),
                         namespaces=["namespace-c"],
                         topology_key="key-2",
                     ),
                 )
             ]),
     )
示例#5
0
 def export_deployment(self):
     # Configureate Pod template container
     volume_mounts = []
     containers = []
     volumes = []
     volume_mounts.append(
         client.V1VolumeMount(mount_path='/docker/logs', name='logs'))
     volumes.append(
         client.V1Volume(name='logs',
                         host_path=client.V1HostPathVolumeSource(
                             path='/opt/logs', type='DirectoryOrCreate')))
     if self.mounts:
         for path in self.mounts:
             volume_mounts.append(
                 client.V1VolumeMount(mount_path=path,
                                      name=self.mounts[path]))
             volumes.append(
                 client.V1Volume(name=self.mounts[path],
                                 host_path=client.V1HostPathVolumeSource(
                                     path=path, type='DirectoryOrCreate')))
     liveness_probe = client.V1Probe(initial_delay_seconds=15,
                                     tcp_socket=client.V1TCPSocketAction(
                                         port=int(self.container_port[0])))
     readiness_probe = client.V1Probe(initial_delay_seconds=15,
                                      tcp_socket=client.V1TCPSocketAction(
                                          port=int(self.container_port[0])))
     if self.healthcheck:
         liveness_probe = client.V1Probe(initial_delay_seconds=15,
                                         http_get=client.V1HTTPGetAction(
                                             path=self.healthcheck,
                                             port=int(
                                                 self.container_port[0])))
         readiness_probe = client.V1Probe(initial_delay_seconds=15,
                                          http_get=client.V1HTTPGetAction(
                                              path=self.healthcheck,
                                              port=int(
                                                  self.container_port[0])))
     Env = [
         client.V1EnvVar(name='LANG', value='en_US.UTF-8'),
         client.V1EnvVar(name='LC_ALL', value='en_US.UTF-8'),
         client.V1EnvVar(name='POD_NAME',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='metadata.name'))),
         client.V1EnvVar(name='POD_IP',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='status.podIP'))),
     ]
     container = client.V1Container(
         name=self.dm_name,
         image=self.image,
         ports=[
             client.V1ContainerPort(container_port=int(port))
             for port in self.container_port
         ],
         image_pull_policy='Always',
         env=Env,
         resources=client.V1ResourceRequirements(limits=self.re_limits,
                                                 requests=self.re_requests),
         volume_mounts=volume_mounts,
         liveness_probe=liveness_probe,
         readiness_probe=readiness_probe)
     containers.append(container)
     if self.sidecar:
         sidecar_container = client.V1Container(
             name='sidecar-%s' % self.dm_name,
             image=self.sidecar,
             image_pull_policy='Always',
             env=Env,
             resources=client.V1ResourceRequirements(
                 limits=self.re_limits, requests=self.re_requests),
             volume_mounts=volume_mounts)
         containers.append(sidecar_container)
     # Create and configurate a spec section
     secrets = client.V1LocalObjectReference('registrysecret')
     template = client.V1PodTemplateSpec(
         metadata=client.V1ObjectMeta(labels={"project": self.dm_name}),
         spec=client.V1PodSpec(
             containers=containers,
             image_pull_secrets=[secrets],
             volumes=volumes,
             affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                 preferred_during_scheduling_ignored_during_execution=[
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='project',
                                     operator='In',
                                     values=['moji'])
                             ]),
                         weight=30),
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='deploy',
                                     operator='In',
                                     values=[self.dm_name])
                             ]),
                         weight=70)
                 ]))))
     selector = client.V1LabelSelector(
         match_labels={"project": self.dm_name})
     # Create the specification of deployment
     spec = client.ExtensionsV1beta1DeploymentSpec(replicas=int(
         self.replicas),
                                                   template=template,
                                                   selector=selector,
                                                   min_ready_seconds=3)
     # Instantiate the deployment object
     deployment = client.ExtensionsV1beta1Deployment(
         api_version="extensions/v1beta1",
         kind="Deployment",
         metadata=client.V1ObjectMeta(name=self.dm_name),
         spec=spec)
     return deployment
def create_deployment_old(config_file):
    """
    Create IBM Spectrum Scale CSI Operator deployment object in operator namespace using
    deployment_operator_image_for_crd and deployment_driver_image_for_crd parameters from
    config.json file

    Args:
        param1: config_file - configuration json file

    Returns:
       None

    Raises:
        Raises an exception on kubernetes client api failure and asserts

    """

    deployment_apps_api_instance = client.AppsV1Api()

    deployment_labels = {
        "app.kubernetes.io/instance": "ibm-spectrum-scale-csi-operator",
        "app.kubernetes.io/managed-by": "ibm-spectrum-scale-csi-operator",
        "app.kubernetes.io/name": "ibm-spectrum-scale-csi-operator",
        "product": "ibm-spectrum-scale-csi",
        "release": "ibm-spectrum-scale-csi-operator"
    }

    deployment_annotations = {
        "productID": "ibm-spectrum-scale-csi-operator",
        "productName": "IBM Spectrum Scale CSI Operator",
        "productVersion": "2.0.0"
    }

    deployment_metadata = client.V1ObjectMeta(
        name="ibm-spectrum-scale-csi-operator",
        labels=deployment_labels,
        namespace=namespace_value)

    deployment_selector = client.V1LabelSelector(
        match_labels={
            "app.kubernetes.io/name": "ibm-spectrum-scale-csi-operator"
        })

    podtemplate_metadata = client.V1ObjectMeta(
        labels=deployment_labels, annotations=deployment_annotations)

    pod_affinity = client.V1Affinity(node_affinity=client.V1NodeAffinity(
        required_during_scheduling_ignored_during_execution=client.
        V1NodeSelector(node_selector_terms=[
            client.V1NodeSelectorTerm(match_expressions=[
                client.V1NodeSelectorRequirement(key="beta.kubernetes.io/arch",
                                                 operator="Exists")
            ])
        ])))
    ansible_pod_container = client.V1Container(
        image=config_file["deployment_operator_image_for_crd"],
        command=[
            "/usr/local/bin/ao-logs", "/tmp/ansible-operator/runner", "stdout"
        ],
        liveness_probe=client.V1Probe(
            _exec=client.V1ExecAction(command=["/health_check.sh"]),
            initial_delay_seconds=10,
            period_seconds=30),
        readiness_probe=client.V1Probe(
            _exec=client.V1ExecAction(command=["/health_check.sh"]),
            initial_delay_seconds=3,
            period_seconds=1),
        name="ansible",
        image_pull_policy="IfNotPresent",
        security_context=client.V1SecurityContext(
            capabilities=client.V1Capabilities(drop=["ALL"])),
        volume_mounts=[
            client.V1VolumeMount(mount_path="/tmp/ansible-operator/runner",
                                 name="runner",
                                 read_only=True)
        ],
        env=[
            client.V1EnvVar(
                name="CSI_DRIVER_IMAGE",
                value=config_file["deployment_driver_image_for_crd"])
        ])

    operator_pod_container = client.V1Container(
        image=config_file["deployment_operator_image_for_crd"],
        name="operator",
        image_pull_policy="IfNotPresent",
        liveness_probe=client.V1Probe(
            _exec=client.V1ExecAction(command=["/health_check.sh"]),
            initial_delay_seconds=10,
            period_seconds=30),
        readiness_probe=client.V1Probe(
            _exec=client.V1ExecAction(command=["/health_check.sh"]),
            initial_delay_seconds=3,
            period_seconds=1),
        security_context=client.V1SecurityContext(
            capabilities=client.V1Capabilities(drop=["ALL"])),
        env=[
            client.V1EnvVar(name="WATCH_NAMESPACE",
                            value_from=client.V1EnvVarSource(
                                field_ref=client.V1ObjectFieldSelector(
                                    field_path="metadata.namespace"))),
            client.V1EnvVar(name="POD_NAME",
                            value_from=client.V1EnvVarSource(
                                field_ref=client.V1ObjectFieldSelector(
                                    field_path="metadata.name"))),
            client.V1EnvVar(name="OPERATOR_NAME",
                            value="ibm-spectrum-scale-csi-operator"),
            client.V1EnvVar(
                name="CSI_DRIVER_IMAGE",
                value=config_file["deployment_driver_image_for_crd"])
        ],
        volume_mounts=[
            client.V1VolumeMount(mount_path="/tmp/ansible-operator/runner",
                                 name="runner")
        ])
    pod_spec = client.V1PodSpec(
        affinity=pod_affinity,
        containers=[ansible_pod_container, operator_pod_container],
        service_account_name="ibm-spectrum-scale-csi-operator",
        volumes=[
            client.V1Volume(
                empty_dir=client.V1EmptyDirVolumeSource(medium="Memory"),
                name="runner")
        ])

    podtemplate_spec = client.V1PodTemplateSpec(metadata=podtemplate_metadata,
                                                spec=pod_spec)

    deployment_spec = client.V1DeploymentSpec(replicas=1,
                                              selector=deployment_selector,
                                              template=podtemplate_spec)

    body_dep = client.V1Deployment(kind='Deployment',
                                   api_version='apps/v1',
                                   metadata=deployment_metadata,
                                   spec=deployment_spec)

    try:
        LOGGER.info("creating deployment for operator")
        deployment_apps_api_response = deployment_apps_api_instance.create_namespaced_deployment(
            namespace=namespace_value, body=body_dep)
        LOGGER.debug(str(deployment_apps_api_response))
    except ApiException as e:
        LOGGER.error(
            f"Exception when calling RbacAuthorizationV1Api->create_namespaced_deployment: {e}"
        )
        assert False
示例#7
0
def clean_pod_template(pod_template, match_node_purpose="prefer", pod_type="worker"):
    """ Normalize pod template and check for type errors """
    if isinstance(pod_template, str):
        msg = (
            "Expected a kubernetes.client.V1Pod object, got %s"
            "If trying to pass a yaml filename then use "
            "KubeCluster.from_yaml"
        )
        raise TypeError(msg % pod_template)

    if isinstance(pod_template, dict):
        msg = (
            "Expected a kubernetes.client.V1Pod object, got %s"
            "If trying to pass a dictionary specification then use "
            "KubeCluster.from_dict"
        )
        raise TypeError(msg % str(pod_template))

    pod_template = copy.deepcopy(pod_template)

    # Make sure metadata / labels / env objects exist, so they can be modified
    # later without a lot of `is None` checks
    if pod_template.metadata is None:
        pod_template.metadata = client.V1ObjectMeta()
    if pod_template.metadata.labels is None:
        pod_template.metadata.labels = {}

    if pod_template.spec.containers[0].env is None:
        pod_template.spec.containers[0].env = []

    # add default tolerations
    tolerations = [
        client.V1Toleration(
            key="k8s.dask.org/dedicated",
            operator="Equal",
            value=pod_type,
            effect="NoSchedule",
        ),
        # GKE currently does not permit creating taints on a node pool
        # with a `/` in the key field
        client.V1Toleration(
            key="k8s.dask.org_dedicated",
            operator="Equal",
            value=pod_type,
            effect="NoSchedule",
        ),
    ]

    if pod_template.spec.tolerations is None:
        pod_template.spec.tolerations = tolerations
    else:
        pod_template.spec.tolerations.extend(tolerations)

    # add default node affinity to k8s.dask.org/node-purpose=worker
    if match_node_purpose != "ignore":
        # for readability
        affinity = pod_template.spec.affinity

        if affinity is None:
            affinity = client.V1Affinity()
        if affinity.node_affinity is None:
            affinity.node_affinity = client.V1NodeAffinity()

        # a common object for both a preferred and a required node affinity
        node_selector_term = client.V1NodeSelectorTerm(
            match_expressions=[
                client.V1NodeSelectorRequirement(
                    key="k8s.dask.org/node-purpose", operator="In", values=[pod_type]
                )
            ]
        )

        if match_node_purpose == "require":
            if (
                affinity.node_affinity.required_during_scheduling_ignored_during_execution
                is None
            ):
                affinity.node_affinity.required_during_scheduling_ignored_during_execution = client.V1NodeSelector(
                    node_selector_terms=[]
                )
            affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.append(
                node_selector_term
            )
        elif match_node_purpose == "prefer":
            if (
                affinity.node_affinity.preferred_during_scheduling_ignored_during_execution
                is None
            ):
                affinity.node_affinity.preferred_during_scheduling_ignored_during_execution = (
                    []
                )
            preferred_scheduling_terms = [
                client.V1PreferredSchedulingTerm(
                    preference=node_selector_term, weight=100
                )
            ]
            affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.extend(
                preferred_scheduling_terms
            )
        else:
            raise ValueError(
                'Attribute must be one of "ignore", "prefer", or "require".'
            )
        pod_template.spec.affinity = affinity

    return pod_template
示例#8
0
def update_deploy_v2():
    data = json.loads(request.get_data().decode('UTF-8'))
    current_app.logger.debug("接受到的数据:{}".format(data))
    namespace = handle_input(data.get('namespace'))
    deploy_name = handle_input(data.get('deploy_name'))
    action = handle_input(data.get('action'))

    image = None
    replicas = None
    toleration = None
    pod_anti_affinity = None
    pod_affinity = None
    node_affinity = None
    labels = None
    if action == "add_pod_anti_affinity":
        print("正在运行{}操作".format(action))
        affinity = handle_input(data.get('pod_anti_affinity'))
        affinity_type = handle_input(affinity.get('type'))

        labelSelector = handle_input(affinity.get('labelSelector'))
        key = handle_input(affinity.get('key'))
        value = handle_input(affinity.get('value'))

        topologyKey = handle_input(affinity.get('topologyKey'))
        if affinity_type == "required":
            if labelSelector == "matchExpressions":
                if not isinstance(value, list):
                    value = [value]
                operator = handle_input(affinity.get('operator'))
                if operator != 'In' and operator != 'NotIn':
                    value = None
                print(value)
                label_selector = client.V1LabelSelector(match_expressions=[
                    client.V1LabelSelectorRequirement(
                        key=key, operator=operator, values=value)
                ])
            elif labelSelector == "matchLabels":
                if isinstance(value, list):
                    return jsonify(
                        {"error": "{}模式下不支持values设置为数组".format(labelSelector)})
                label_selector = client.V1LabelSelector(
                    match_labels={key: value})
            else:
                return jsonify(
                    {"error": "不支持{} labelSelector".format(labelSelector)})
            client.V1Affinity
            pod_anti_affinity = client.V1PodAntiAffinity(
                required_during_scheduling_ignored_during_execution=[
                    client.V1PodAffinityTerm(label_selector=label_selector,
                                             topology_key=topologyKey)
                ])
            print("添加的互斥调度为:{}".format(pod_anti_affinity))
        elif affinity_type == "preferred":
            weight = string_to_int(handle_input(affinity.get('weight')))
            if weight == None:
                return jsonify(
                    {"error": "{}类型必须设置weight".format(affinity_type)})

            if labelSelector == "matchExpressions":
                if not isinstance(value, list):
                    value = [value]

                operator = handle_input(affinity.get('operator'))
                if operator != 'In' and operator != 'NotIn':
                    value = None
                label_selector = client.V1LabelSelector(match_expressions=[
                    client.V1LabelSelectorRequirement(
                        key=key, operator=operator, values=value)
                ])
            elif labelSelector == "matchLabels":
                if isinstance(value, list):
                    return jsonify(
                        {"error": "{}模式下不支持values设置为数组".format(labelSelector)})
                label_selector = client.V1LabelSelector(
                    match_labels={key: value})
            else:
                return jsonify(
                    {"error": "不支持{} labelSelector".format(labelSelector)})
            pod_anti_affinity = client.V1PodAntiAffinity(
                preferred_during_scheduling_ignored_during_execution=[
                    client.V1WeightedPodAffinityTerm(
                        pod_affinity_term=client.V1PodAffinityTerm(
                            label_selector=label_selector,
                            topology_key=topologyKey),
                        weight=weight)
                ])
            print("添加的互斥调度为:{}".format(pod_anti_affinity))
        else:
            return jsonify({"error": "不支持{}这种调度".format(affinity_type)})
    elif action == "delete_pod_anti_affinity":
        print("正在运行{}操作".format(action))
        pass
    elif action == "add_node_affinity":
        current_app.logger.debug("正在运行{}操作".format(action))
        affinity = handle_input(data.get('node_affinity'))
        node_affinity_type = handle_input(affinity.get('type'))

        nodeSelector = handle_input(affinity.get('nodeSelector'))
        key = handle_input(affinity.get('key'))
        value = handle_input(affinity.get('value'))
        operator = handle_input(affinity.get('operator'))
        values = []
        if operator == 'Exists' or operator == 'DoesNotExist':
            values == None
        else:
            if not isinstance(value, list):
                values.append(value)
            else:
                values = value

        if node_affinity_type == "preferred":
            weight = string_to_int(handle_input(affinity.get('weight')))
            if weight == None:
                return simple_error_handle(
                    "{}类型必须设置weight".format(node_affinity_type))
            preferred_term = []
            if nodeSelector == "matchExpressions":
                match_expressions = []
                expression = client.V1NodeSelectorRequirement(
                    key=key,
                    operator=operator,
                    values=values,
                )
                match_expressions.append(expression)
                preference = client.V1NodeSelectorTerm(
                    match_expressions=match_expressions)
            # nodeSelector == "matchFields"
            else:
                match_fields = []
                field = client.V1NodeSelectorRequirement(
                    key=key,
                    operator=operator,
                    values=values,
                )
                match_fields.append(field)
                preference = client.V1NodeSelectorTerm(
                    match_fields=match_fields)
            term = client.V1PreferredSchedulingTerm(
                weight=weight,
                preference=preference,
            )
            preferred_term.append(term)
            node_affinity = client.V1NodeAffinity(
                #直接append
                preferred_during_scheduling_ignored_during_execution=
                preferred_term)
        elif node_affinity_type == "required":
            current_app.logger.debug(
                "node_affinity_type:{}".format(node_affinity_type))
            node_selector_terms = []
            if nodeSelector == "matchExpressions":
                match_expressions = []
                expression = client.V1NodeSelectorRequirement(
                    key=key,
                    operator=operator,
                    values=values,
                )
                match_expressions.append(expression)
                term = client.V1NodeSelectorTerm(
                    match_expressions=match_expressions)
            else:
                match_fields = []
                field = client.V1NodeSelectorRequirement(
                    key=key,
                    operator=operator,
                    values=values,
                )
                match_fields.append(field)

                term = client.V1NodeSelectorTerm(match_fields=match_fields)
            node_selector_terms.append(term)
            node_affinity = client.V1NodeAffinity(
                required_during_scheduling_ignored_during_execution=client.
                V1NodeSelector(node_selector_terms=node_selector_terms))
        else:
            return simple_error_handle("不支持{}这种调度".format(node_affinity_type))
    elif action == "delete_node_affinity":
        print("正在运行{}操作".format(action))
        pass
    elif action == "add_toleration":
        print("正在运行{}操作".format(action))
        t = handle_input(data.get("toleration"))
        print(type(toleration), toleration)

        effect = t.get('effect')
        key = t.get('key')
        operator = t.get('operator')
        value = t.get('value')
        toleration_seconds = handle_toleraion_seconds(
            t.get('toleration_seconds'))
        print("toleration_seconds:{}".format(toleration_seconds))
        toleration = client.V1Toleration(effect=effect,
                                         key=key,
                                         operator=operator,
                                         toleration_seconds=toleration_seconds,
                                         value=value)
        print(toleration)
        if not toleration:
            msg = "{}需要提供toleration(effect,key,operator,value,)".format(action)
            return jsonify({"error": msg})
    elif action == "delete_toleration":
        print("正在运行{}操作".format(action))
        t = handle_input(data.get("toleration"))
        effect = handle_toleration_item(t.get('effect'))
        key = handle_toleration_item(t.get('key'))
        operator = handle_toleration_item(t.get('operator'))
        value = handle_toleration_item(t.get('value'))
        toleration_seconds = handle_toleraion_seconds(
            t.get('toleration_seconds'))
        print("toleration_seconds:{}".format(toleration_seconds))

        # if (effect != None and key != None and operator != None):
        toleration = client.V1Toleration(effect=effect,
                                         key=key,
                                         operator=operator,
                                         toleration_seconds=toleration_seconds,
                                         value=value)
        if not toleration:
            msg = "{}需要提供toleration(effect,key,operator,value,)".format(action)
            return jsonify({"error": msg})
    elif action == "add_pod_affinity":
        pass
    elif action == "delete_pod_affinity":
        pass
    elif action == "update_replicas":
        replicas = handle_input(data.get('replicas'))
        if not replicas:
            msg = "{}需要提供replicas".format(action)
            return jsonify({"error": msg})
    elif action == "update_image":
        project = handle_input(data.get('project'))
        env = handle_input(data.get('env'))
        imageRepo = handle_input(data.get('imageRepo'))
        imageName = handle_input(data.get('imageName'))
        imageTag = handle_input(data.get('imageTag'))
        if (imageRepo != None and project != None and env != None
                and imageName != None and imageTag != None):
            image = "{}/{}-{}/{}:{}".format(imageRepo, project, env, imageName,
                                            imageTag)
        print("image值{}".format(image))
        if not image:
            msg = "{}需要提供image".format(action)
            return jsonify({"error": msg})
    elif action == "add_labels":
        pass
    elif action == "delete_labels":
        pass
    else:
        msg = "暂时不支持{}操作".format(action)
        print(msg)
        return jsonify({"error": msg})
    return update_deployment_v2(deploy_name=deploy_name, namespace=namespace, action=action, image=image, replicas=replicas,toleration=toleration,node_affinity=node_affinity,\
                pod_anti_affinity=pod_anti_affinity,pod_affinity=pod_affinity,labels=labels)
示例#9
0
 def export_deployment(self):
     # Configureate Pod template container
     volume_mounts = []
     containers = []
     volumes = []
     ports = []
     liveness_probe = None
     readiness_probe = None
     volume_mounts.append(
         client.V1VolumeMount(mount_path='/docker/logs', name='logs'))
     volumes.append(
         client.V1Volume(name='logs',
                         host_path=client.V1HostPathVolumeSource(
                             path='/opt/logs', type='DirectoryOrCreate')))
     if self.mounts:
         for path in self.mounts:
             volume_mounts.append(
                 client.V1VolumeMount(mount_path=path,
                                      name=self.mounts[path]))
             volumes.append(
                 client.V1Volume(name=self.mounts[path],
                                 host_path=client.V1HostPathVolumeSource(
                                     path=path, type='DirectoryOrCreate')))
     if self.container_port:
         ports = [
             client.V1ContainerPort(container_port=int(port))
             for port in self.container_port
         ]
         liveness_probe = client.V1Probe(
             initial_delay_seconds=15,
             tcp_socket=client.V1TCPSocketAction(
                 port=int(self.container_port[0])))
         readiness_probe = client.V1Probe(
             initial_delay_seconds=15,
             tcp_socket=client.V1TCPSocketAction(
                 port=int(self.container_port[0])))
         if self.healthcheck:
             liveness_probe = client.V1Probe(
                 initial_delay_seconds=15,
                 http_get=client.V1HTTPGetAction(
                     path=self.healthcheck,
                     port=int(self.container_port[0])))
             readiness_probe = client.V1Probe(
                 initial_delay_seconds=15,
                 http_get=client.V1HTTPGetAction(
                     path=self.healthcheck,
                     port=int(self.container_port[0])))
     Env = [
         client.V1EnvVar(name='LANG', value='en_US.UTF-8'),
         client.V1EnvVar(name='LC_ALL', value='en_US.UTF-8'),
         client.V1EnvVar(name='POD_NAME',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='metadata.name'))),
         client.V1EnvVar(name='POD_IP',
                         value_from=client.V1EnvVarSource(
                             field_ref=client.V1ObjectFieldSelector(
                                 field_path='status.podIP'))),
     ]
     container = client.V1Container(name=self.dm_name,
                                    image=self.image,
                                    ports=ports,
                                    image_pull_policy='Always',
                                    env=Env,
                                    resources=client.V1ResourceRequirements(
                                        limits=self.re_limits,
                                        requests=self.re_requests),
                                    volume_mounts=volume_mounts)
     if liveness_probe and readiness_probe:
         container = client.V1Container(
             name=self.dm_name,
             image=self.image,
             ports=ports,
             image_pull_policy='Always',
             env=Env,
             resources=client.V1ResourceRequirements(
                 limits=self.re_limits, requests=self.re_requests),
             volume_mounts=volume_mounts,
             liveness_probe=liveness_probe,
             readiness_probe=readiness_probe)
     containers.append(container)
     if self.sidecar:
         sidecar_container = client.V1Container(
             name='sidecar-%s' % self.dm_name,
             image=self.sidecar,
             image_pull_policy='Always',
             env=Env,
             resources=client.V1ResourceRequirements(
                 limits=self.re_limits, requests=self.re_requests),
             volume_mounts=volume_mounts)
         containers.append(sidecar_container)
     # Create and configurate a spec section
     secrets = client.V1LocalObjectReference('registrysecret')
     preference_key = self.dm_name
     project_values = ['xxxx']
     host_aliases = []
     db_docker_hosts = db_op.docker_hosts
     values = db_docker_hosts.query.with_entities(
         db_docker_hosts.ip, db_docker_hosts.hostname).filter(
             and_(db_docker_hosts.deployment == self.dm_name,
                  db_docker_hosts.context == self.context)).all()
     db_op.DB.session.remove()
     if values:
         ips = []
         for value in values:
             try:
                 ip, hostname = value
                 key = "op_docker_hosts_%s" % ip
                 Redis.lpush(key, hostname)
                 ips.append(ip)
             except Exception as e:
                 logging.error(e)
         for ip in set(ips):
             try:
                 key = "op_docker_hosts_%s" % ip
                 if Redis.exists(key):
                     hostnames = Redis.lrange(key, 0, -1)
                     if hostnames:
                         host_aliases.append(
                             client.V1HostAlias(hostnames=hostnames, ip=ip))
                 Redis.delete(key)
             except Exception as e:
                 logging.error(e)
     if self.labels:
         if 'deploy' in self.labels:
             preference_key = self.labels['deploy']
         if 'project' in self.labels:
             project_values = [self.labels['project']]
     template = client.V1PodTemplateSpec(
         metadata=client.V1ObjectMeta(labels={"project": self.dm_name}),
         spec=client.V1PodSpec(
             containers=containers,
             image_pull_secrets=[secrets],
             volumes=volumes,
             host_aliases=host_aliases,
             affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                 preferred_during_scheduling_ignored_during_execution=[
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key=preference_key,
                                     operator='In',
                                     values=['mark'])
                             ]),
                         weight=100)
                 ],
                 required_during_scheduling_ignored_during_execution=client.
                 V1NodeSelector(node_selector_terms=[
                     client.V1NodeSelectorTerm(match_expressions=[
                         client.V1NodeSelectorRequirement(
                             key='project',
                             operator='In',
                             values=project_values)
                     ])
                 ])))))
     selector = client.V1LabelSelector(
         match_labels={"project": self.dm_name})
     # Create the specification of deployment
     spec = client.ExtensionsV1beta1DeploymentSpec(replicas=int(
         self.replicas),
                                                   template=template,
                                                   selector=selector,
                                                   min_ready_seconds=3)
     # Instantiate the deployment object
     deployment = client.ExtensionsV1beta1Deployment(
         api_version="extensions/v1beta1",
         kind="Deployment",
         metadata=client.V1ObjectMeta(name=self.dm_name),
         spec=spec)
     return deployment
示例#10
0
def add_servers():

    if count_servers():
        return "You can't have more than 2 servers", 403
    game_id = request.json['game_id']
    params = request.json['parms']
    u_ip = request.remote_addr

    game = get_game_by_id(game_id)
    try:
        game.validate_params(params, game)
    except Exception as e:
        return str(e), 404

    uid = uuid.uuid4().hex[:12]
    name = "gaas-{}".format(uid)
    labels = {
        "app": "gaas",
        "game": game_id,
        "server": uid,
        "creator": u_ip,
    }
    metadata = client.V1ObjectMeta(
        labels=labels,
        name=name,
    )
    ip_ext = alloc_ip()
    extra_env = [
        client.V1EnvVar(name="IP_ALLOC", value=ip_ext),
        client.V1EnvVar(name="IP_CREATOR", value=u_ip)
    ]
    containers = game.make_deployment(params)
    for container in containers:
        if container.env:
            container.env.extend(extra_env)
        else:
            container.env = extra_env
        if not container.resources:
            container.resources = client.V1ResourceRequirements(limits={
                "cpu": "2",
                "memory": "1G"
            },
                                                                requests={
                                                                    "cpu": "1",
                                                                    "memory":
                                                                    "1G"
                                                                })
    deployment = client.V1Deployment(spec=client.V1DeploymentSpec(
        replicas=1,
        strategy=client.AppsV1beta1DeploymentStrategy(
            rolling_update=client.AppsV1beta1RollingUpdateDeployment(
                max_surge=0, max_unavailable=1)),
        selector=client.V1LabelSelector(match_labels=labels, ),
        template=client.V1PodTemplateSpec(spec=client.V1PodSpec(
            containers=containers,
            termination_grace_period_seconds=0,
            affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                required_during_scheduling_ignored_during_execution=client.
                V1NodeSelector(node_selector_terms=[
                    client.V1NodeSelectorTerm(match_expressions=[
                        client.V1NodeSelectorRequirement(
                            key="kubernetes.io/role",
                            operator="NotIn",
                            values=["shared"])
                    ])
                ])))))))
    service = client.V1Service(spec=client.V1ServiceSpec(
        type="ClusterIP",
        selector=labels,
        ports=game.make_service(params),
    ))
    deployment.metadata = metadata
    deployment.spec.template.metadata = metadata
    service.metadata = metadata

    client.AppsV1Api().create_namespaced_deployment(
        namespace="gaas",
        body=deployment,
    )

    service_resp = client.CoreV1Api().create_namespaced_service(
        namespace="gaas",
        body=service,
    )
    return {"uid": uid, "ip": u_ip}
示例#11
0
 def export_deployment(self):
     # Configureate Pod template container
     volume_mounts = []
     volume_mounts.append(
         client.V1VolumeMount(mount_path='/opt/logs', name='logs'))
     if self.dm_name == 'launch':
         volume_mounts.append(
             client.V1VolumeMount(mount_path='/opt/%s/conf' % self.dm_name,
                                  name=self.dm_name))
     container = client.V1Container(
         name=self.dm_name,
         image=self.image,
         ports=[
             client.V1ContainerPort(container_port=int(port))
             for port in self.container_port
         ],
         image_pull_policy='Always',
         env=[
             client.V1EnvVar(name='LANG', value='en_US.UTF-8'),
             client.V1EnvVar(name='LC_ALL', value='en_US.UTF-8')
         ],
         resources=client.V1ResourceRequirements(limits=self.re_limits,
                                                 requests=self.re_requests),
         volume_mounts=volume_mounts,
         liveness_probe=client.V1Probe(
             initial_delay_seconds=30,
             tcp_socket=client.V1TCPSocketAction(
                 port=int(self.container_port[0]))),
         readiness_probe=client.V1Probe(
             initial_delay_seconds=30,
             tcp_socket=client.V1TCPSocketAction(
                 port=int(self.container_port[0]))))
     # Create and configurate a spec section
     secrets = client.V1LocalObjectReference('registrysecret')
     volumes = []
     volume = client.V1Volume(
         name='logs',
         host_path=client.V1HostPathVolumeSource(path='/opt/logs'))
     volumes.append(volume)
     template = client.V1PodTemplateSpec(
         metadata=client.V1ObjectMeta(labels={"project": self.dm_name}),
         spec=client.V1PodSpec(
             containers=[container],
             image_pull_secrets=[secrets],
             volumes=volumes,
             affinity=client.V1Affinity(node_affinity=client.V1NodeAffinity(
                 preferred_during_scheduling_ignored_during_execution=[
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='project',
                                     operator='In',
                                     values=['moji'])
                             ]),
                         weight=30),
                     client.V1PreferredSchedulingTerm(
                         preference=client.V1NodeSelectorTerm(
                             match_expressions=[
                                 client.V1NodeSelectorRequirement(
                                     key='deploy',
                                     operator='In',
                                     values=[self.dm_name])
                             ]),
                         weight=70)
                 ]))))
     selector = client.V1LabelSelector(
         match_labels={"project": self.dm_name})
     # Create the specification of deployment
     spec = client.ExtensionsV1beta1DeploymentSpec(replicas=int(
         self.replicas),
                                                   template=template,
                                                   selector=selector,
                                                   min_ready_seconds=3)
     # Instantiate the deployment object
     deployment = client.ExtensionsV1beta1Deployment(
         api_version="extensions/v1beta1",
         kind="Deployment",
         metadata=client.V1ObjectMeta(name=self.dm_name),
         spec=spec)
     return deployment
示例#12
0
def submit_job(args, command=None):
    container_image = args.container
    container_name = args.name

    body = client.V1Job(api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name=container_name))
    body.status = client.V1JobStatus()
    template = client.V1PodTemplate()

    labels = {
        'hugin-job': "1",
        'hugin-job-name': f'{container_name}'
    }
    template.template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(labels=labels)
    )

    tolerations = []
    env = []
    if args.environment:
        for env_spec in args.environment:
            env_name,env_value = env_spec.split("=", 1)
            env.append(client.V1EnvVar(name=env_name, value=env_value))

    containe_args = dict(
        name=f"container-{container_name}",
        image=container_image,
        env=env,
    )

    if args.gpu:
        tolerations.append(client.V1Toleration(
        key='nvidia.com/gpu', operator='Exists', effect='NoSchedule'))
        containe_args['resources'] = client.V1ResourceRequirements(limits={"nvidia.com/gpu": 1})
    if command or args.command:
        containe_args['command'] = command if command else args.command

    container = client.V1Container(**containe_args)
    pull_secrets = []
    if args.pull_secret is not None:
        pull_secrets.append(client.V1LocalObjectReference(name=args.pull_secret))
    pod_args = dict(containers=[container],
                    restart_policy='Never',
                    image_pull_secrets=pull_secrets)


    if tolerations:
        pod_args['tolerations'] = tolerations

    if args.node_selector is not None:
        parts = args.node_selector.split("=", 1)
        if len(parts) == 2:
            affinity = client.V1Affinity(
                node_affinity=client.V1NodeAffinity(
                    required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
                        node_selector_terms=[client.V1NodeSelectorTerm(
                            match_expressions=[client.V1NodeSelectorRequirement(
                                key=parts[0], operator='In', values=[parts[1]])]
                        )]
                    )
                )
            )
            pod_args['affinity'] = affinity

    template.template.spec = client.V1PodSpec(**pod_args)
    body.spec = client.V1JobSpec(ttl_seconds_after_finished=1800, template=template.template)
    try:
        api_response = batch_v1.create_namespaced_job("default", body, pretty=True)
        #print (api_response)
    except client.exceptions.ApiException as e:
        logging.critical(f"Failed to start job: {e.reason}")
示例#13
0
def create_run_pod(k8s_settings, run_context):
    run_id = run_context.id
    run_name = run_context.run.to_json()["name"]

    labels = {
        "run-name": run_name,
        "run": run_id,
    }
    env = get_run_pod_env_vars(run_context)
    node_topology_key = "kubernetes.io/hostname"
    # NOTE(taylor): preference to run on nodes with other runs
    pod_affinities = [
        k8s_client.V1WeightedPodAffinityTerm(
            weight=50,
            pod_affinity_term=k8s_client.V1PodAffinityTerm(
                label_selector=k8s_client.V1LabelSelector(match_labels={
                    "type": "run",
                }, ),
                topology_key=node_topology_key,
            ),
        ),
    ]
    volumes = []
    volume_mounts = []
    experiment_id = run_context.experiment
    if experiment_id:
        labels.update({"experiment": experiment_id})
        # NOTE(taylor): highest preference to run on nodes with runs in the same experiment
        pod_affinities.append(
            k8s_client.V1WeightedPodAffinityTerm(
                weight=100,
                pod_affinity_term=k8s_client.V1PodAffinityTerm(
                    label_selector=k8s_client.V1LabelSelector(match_labels={
                        "type":
                        "run",
                        "experiment":
                        experiment_id,
                    }, ),
                    topology_key=node_topology_key,
                ),
            ))

    unacceptable_node_group_types = ["system"]
    requests = k8s_settings.resources.get("requests") or {}
    limits = k8s_settings.resources.get("limits") or {}
    # NOTE(taylor): Preventing GPU-less jobs from running on GPU nodes forces the cluster autoscaler to scale up
    # CPU nodes. This prevents a situation where the GPU nodes are not scaled down because they are occupied by
    # CPU workloads. The cluster autoscaler does not know that it should create CPU nodes when the GPUs are unused.
    # TODO(taylor): This could cause unexpected behavior if the cluster has no CPU nodes. Running CPU jobs on GPU
    # nodes could also be an opportunity for more efficient resource utilization, but is avoided for now because the
    # workloads cannot be migrated onto CPU nodes by the cluster autoscaler as mentioned above.
    # NOTE(taylor): Applying a NoSchedule taint to GPU nodes is another way to achieve this behavior, but does not work as
    # well out of the box with clusters that orchestrate doesn't provision. Applying a PreferNoSchedule
    # taint to GPU nodes does not resolve the workload migration issue when there are no CPU nodes.
    if all(
            float(group.get("nvidia.com/gpu", 0)) == 0
            for group in (requests, limits)):
        unacceptable_node_group_types.append("gpu")

    node_affinity = k8s_client.V1NodeAffinity(
        required_during_scheduling_ignored_during_execution=k8s_client.
        V1NodeSelector(node_selector_terms=[
            k8s_client.V1NodeSelectorTerm(match_expressions=[
                k8s_client.V1NodeSelectorRequirement(
                    key="orchestrate.sigopt.com/node-group-type",
                    operator="NotIn",
                    values=unacceptable_node_group_types,
                )
            ], )
        ], ), )
    pod_affinity = k8s_client.V1PodAffinity(
        preferred_during_scheduling_ignored_during_execution=pod_affinities, )

    pod = k8s_client.V1Pod(
        metadata=k8s_client.V1ObjectMeta(
            owner_references=k8s_settings.owner_references,
            labels={
                "type": "run",
                **labels,
            },
            name=run_name,
        ),
        spec=k8s_client.V1PodSpec(
            affinity=k8s_client.V1Affinity(
                node_affinity=node_affinity,
                pod_affinity=pod_affinity,
            ),
            containers=[
                k8s_client.V1Container(
                    name="model-runner",
                    image=k8s_settings.image,
                    resources=k8s_client.V1ResourceRequirements(
                        **k8s_settings.resources),
                    image_pull_policy="Always",
                    command=[],
                    args=k8s_settings.args,
                    env=env,
                    volume_mounts=volume_mounts,
                    tty=True,
                ),
            ],
            volumes=volumes,
            restart_policy="Never",
        ),
    )
    k8s_settings.api.create_namespaced_pod(k8s_settings.namespace, pod)
    return pod
示例#14
0
def add(ip, game_id, params):
    game=get_game_by_id(game_id)
    game.validate_params(params)
    uid=uuid.uuid4().hex[:12]
    name="gaas-{}".format(uid)
    labels={
        "app": "gaas",
        "game": game_id,
        "server": uid,
        "creator": ip,
    }
    metadata=client.V1ObjectMeta(
        labels=labels,
        name=name,
    )
    ip_ext=alloc_ip()
    extra_env=[client.V1EnvVar(
        name="IP_ALLOC",
        value=ip_ext
    ), client.V1EnvVar(
        name="IP_CREATOR",
        value=ip
    )]
    containers = game.make_deployment(params)
    generic_ports = []
    # TODO(bluecmd): Hack to work around that not all
    # ports are routed to the VIP by default. This allows
    # outgoing connections from inside the pod on the VIP.
    for p in range(50000, 50016):
        generic_ports.append(client.V1ServicePort(
            name="internal-tcp-" + str(p), port=p, target_port=p, protocol="TCP"))
        generic_ports.append(client.V1ServicePort(
            name="internal-udp-" + str(p), port=p, target_port=p, protocol="UDP"))
    for container in containers:
        if container.env:
            container.env.extend(extra_env)
        else:
            container.env = extra_env
        if not container.resources:
            container.resources=client.V1ResourceRequirements(
                limits={
                    "cpu": "4",
                    "memory": "32G"
                },
                requests={
                    "cpu": "2",
                    "memory": "16G"
                }
            )
    deployment=client.V1Deployment(
            spec=client.V1DeploymentSpec(
                replicas=1,
                strategy=client.AppsV1beta1DeploymentStrategy(
                    rolling_update=client.AppsV1beta1RollingUpdateDeployment(
                        max_surge=0,
                        max_unavailable=1
                    )
                ),
                selector=client.V1LabelSelector(
                    match_labels=labels,
                ),
                template=client.V1PodTemplateSpec(
                    spec=client.V1PodSpec(
                        containers=containers,
                        termination_grace_period_seconds=0,
                        # TODO(bluecmd): Hack to work around that not all
                        # ports are routed to the VIP by default. This allows
                        # outgoing connections from inside the pod on the VIP.
                        security_context=client.V1PodSecurityContext(
                            sysctls=[client.V1Sysctl(
                                name='net.ipv4.ip_local_port_range',
                                value='50000 50015')]),
                        affinity=client.V1Affinity(
                            node_affinity=client.V1NodeAffinity(
                                required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
                                    node_selector_terms=[
                                        client.V1NodeSelectorTerm(
                                            match_expressions=[
                                                client.V1NodeSelectorRequirement(
                                                    key="kubernetes.io/role",
                                                    operator="NotIn",
                                                    values=["shared"]
                                                )
                                            ]
                                        )
                                    ]
                                )
                            )
                        )
                    )
                )
            )
    )
    service=client.V1Service(
        spec=client.V1ServiceSpec(
            type="ClusterIP",
            selector=labels,
            ports=game.make_service(params) + generic_ports,
            external_i_ps=[ip_ext],
        )
    )
    deployment.metadata=metadata
    deployment.spec.template.metadata=metadata
    service.metadata=metadata
    service.metadata.annotations={"kube-router.io/service.dsr": "tunnel"}

    client.AppsV1Api().create_namespaced_deployment(
        namespace=NAMESPACE,
        body=deployment,
    )

    service_resp = client.CoreV1Api().create_namespaced_service(
        namespace=NAMESPACE,
        body=service,
    )

    return {"uid": uid, "ip": ip}
示例#15
0
    def from_runs(cls, id: str, runs: List[Run]):
        k8s_name = 'tensorboard-' + id
        run_names_hash = K8STensorboardInstance.generate_run_names_hash(runs)

        volume_mounts = []

        for run in runs:
            mount = k8s.V1VolumeMount(
                name=cls.EXPERIMENTS_OUTPUT_VOLUME_NAME,
                mount_path=os.path.join(
                    cls.TENSORBOARD_CONTAINER_MOUNT_PATH_PREFIX, run.owner,
                    run.name),
                sub_path=os.path.join(run.owner, run.name))
            volume_mounts.append(mount)

        deployment_labels = {
            'name': k8s_name,
            'type': 'nauta-tensorboard',
            'nauta_app_name': 'tensorboard',
            'id': id,
            'runs-hash': run_names_hash
        }

        tensorboard_command = [
            "tensorboard", "--logdir",
            cls.TENSORBOARD_CONTAINER_MOUNT_PATH_PREFIX, "--port", "6006",
            "--host", "127.0.0.1"
        ]

        nauta_config = NautaPlatformConfig.incluster_init()

        tensorboard_image = nauta_config.get_tensorboard_image()
        tensorboard_proxy_image = nauta_config.get_activity_proxy_image()

        deployment = k8s.V1Deployment(
            api_version='apps/v1',
            kind='Deployment',
            metadata=k8s.V1ObjectMeta(name=k8s_name, labels=deployment_labels),
            spec=k8s.V1DeploymentSpec(
                replicas=1,
                selector=k8s.V1LabelSelector(match_labels=deployment_labels),
                template=k8s.V1PodTemplateSpec(
                    metadata=k8s.V1ObjectMeta(labels=deployment_labels),
                    spec=k8s.V1PodSpec(
                        tolerations=[
                            k8s.V1Toleration(key='master',
                                             operator='Exists',
                                             effect='NoSchedule')
                        ],
                        affinity=k8s.
                        V1Affinity(node_affinity=k8s.V1NodeAffinity(
                            required_during_scheduling_ignored_during_execution
                            =k8s.V1NodeSelector(node_selector_terms=[
                                k8s.V1NodeSelectorTerm(match_expressions=[
                                    k8s.V1NodeSelectorRequirement(
                                        key="master",
                                        operator="In",
                                        values=["True"])
                                ])
                            ]))),
                        containers=[
                            k8s.V1Container(name='app',
                                            image=tensorboard_image,
                                            command=tensorboard_command,
                                            volume_mounts=volume_mounts),
                            k8s.V1Container(
                                name='proxy',
                                image=tensorboard_proxy_image,
                                ports=[k8s.V1ContainerPort(container_port=80)],
                                readiness_probe=k8s.V1Probe(
                                    period_seconds=5,
                                    http_get=k8s.V1HTTPGetAction(
                                        path='/healthz', port=80)))
                        ],
                        volumes=[
                            k8s.V1Volume(
                                name=cls.EXPERIMENTS_OUTPUT_VOLUME_NAME,
                                persistent_volume_claim=  # noqa
                                k8s.V1PersistentVolumeClaimVolumeSource(
                                    claim_name=cls.
                                    EXPERIMENTS_OUTPUT_VOLUME_NAME,
                                    read_only=True))
                        ]))))

        service = k8s.V1Service(
            api_version='v1',
            kind='Service',
            metadata=k8s.V1ObjectMeta(name=k8s_name,
                                      labels={
                                          'name': k8s_name,
                                          'type': 'nauta-tensorboard',
                                          'nauta_app_name': 'tensorboard',
                                          'id': id
                                      }),
            spec=k8s.V1ServiceSpec(
                type='ClusterIP',
                ports=[k8s.V1ServicePort(name='web', port=80, target_port=80)],
                selector={
                    'name': k8s_name,
                    'type': 'nauta-tensorboard',
                    'nauta_app_name': 'tensorboard',
                    'id': id
                }))

        ingress = k8s.V1beta1Ingress(
            api_version='extensions/v1beta1',
            kind='Ingress',
            metadata=k8s.V1ObjectMeta(
                name=k8s_name,
                labels={
                    'name': k8s_name,
                    'type': 'nauta-tensorboard',
                    'nauta_app_name': 'tensorboard',
                    'id': id
                },
                annotations={
                    'nauta.ingress.kubernetes.io/rewrite-target': '/',
                    'kubernetes.io/ingress.class': 'nauta-ingress'
                }),
            spec=k8s.V1beta1IngressSpec(rules=[
                k8s.V1beta1IngressRule(
                    host='localhost',
                    http=k8s.V1beta1HTTPIngressRuleValue(paths=[
                        k8s.V1beta1HTTPIngressPath(
                            path='/tb/' + id + "/",
                            backend=k8s.V1beta1IngressBackend(
                                service_name=k8s_name, service_port=80))
                    ]))
            ]))

        return cls(deployment=deployment, service=service, ingress=ingress)