def _create_deployment_resources(self): res = AXResources() for route in self.spec.template.internal_routes: # ignore empty port spec if len(route.ports) == 0: logger.debug( "Skipping internal route {} as port spec is empty".format( route.name)) continue ir = InternalRoute(route.name, self.application) ir.create(route.to_dict()["ports"], selector={"deployment": self.name}, owner=self.name) res.insert(ir) logger.debug("Created route {}".format(ir)) for route in self.spec.template.external_routes: dns_name = route.dns_name() if dns_name.endswith("."): dns_name = dns_name[:-1] r = ExternalRoute(dns_name, self.application, {"deployment": self.name}, route.target_port, route.ip_white_list, route.visibility) try: elb_addr = visibility_to_elb_addr(route.visibility) elb_name = visibility_to_elb_name(route.visibility) except AXNotFoundException: if route.visibility == ExternalRouteVisibility.VISIBILITY_WORLD: raise AXNotFoundException( "Could not find the public ELB. Please report this error to Applatix Support at [email protected]" ) else: assert route.visibility == ExternalRouteVisibility.VISIBILITY_ORGANIZATION, "Only world and organization are currently supported as visibility attributes" raise AXNotFoundException( "Please create a private ELB using the template named 'ax_private_elb_creator_workflow' before using 'visibility=organization'" ) name = r.create(elb_addr, elb_name=elb_name) res.insert(r) logger.debug("Created external route {} for {}/{}/{}".format( name, self.application, self.name, dns_name)) main_container = self.spec.template.get_main_container() for key_name, vol in iteritems(main_container.inputs.volumes): assert "resource_id" in vol.details, "Volume resource_id absent in volume details" name = vol.details.get("axrn", None) resource_id = vol.details.get("resource_id", None) assert name is not None and resource_id is not None, "axrn and resource_id are required details for volume {}".format( key_name) nv_res = AXNamedVolumeResource(name, resource_id) nv_res.create() res.insert(nv_res) logger.debug( "Using named volume resource {} in application {}".format( name, self.application)) return res
class Task(object): """ The job of this object is to track the creation and deletion of a step in a workflow. Callers can create this object with the specification from service template, followed by call to create, wait_for_start etc """ def __init__(self, name, namespace="axuser"): self.name = name self.namespace = namespace self.client = KubernetesApiClient(use_proxy=True) self.service = None # this is the argo.services.service.Service object self._host_vols = [] self._name_id = AXClusterId().get_cluster_name_id() self._s3_bucket_ax_is_external = AXLogPath(self._name_id).is_external() self._s3_bucket_ax = AXLogPath(self._name_id).bucket() self._s3_key_prefix_ax = AXLogPath(self._name_id).artifact() self._s3_bucket = AXClusterDataPath(self._name_id).bucket() self._s3_key_prefix = AXClusterDataPath(self._name_id).artifact() self.software_info = SoftwareInfo() self._resources = AXResources() def create(self, conf): """ Create a Kubernetes Job object :param conf: conf data from DevOps :return: """ logger.debug("Task create for {}".format(json.dumps(conf))) self.service = Service() self.service.parse(conf) labels = { "app": self.name, "service_instance_id": self.service.service_context.service_instance_id, "root_workflow_id": self.service.service_context.root_workflow_id, "leaf_full_path": string_to_k8s_label(self.service.service_context.leaf_full_path or "no_path"), "tier": "devops", "role": "user", } template_spec = self._container_to_pod(labels) # convert V1PodTemplateSpec to V1Pod pod_spec = swagger_client.V1Pod() pod_spec.metadata = template_spec.metadata pod_spec.spec = template_spec.spec self._resources.finalize(pod_spec) logger.debug("pod_spec {}".format(json.dumps(pod_spec.to_dict()))) return pod_spec def start(self, spec): """ Start a task :param spec: The swagger specification for Pod :type spec: swagger_client.V1Pod :return: """ assert isinstance( spec, swagger_client.V1Pod), "Unexpected object {} in Task.start".format( type(spec)) @retry_unless(status_code=[409, 422]) def create_in_provider(): return self.client.api.create_namespaced_pod(spec, self.namespace) with TaskOperation(self): return create_in_provider() def status(self, status_obj=None): """ Return the status of the job with the pass name Args: status_obj: if passed the job status will be used instead of queried from provider Returns: a json dict with task status """ if status_obj: assert isinstance( status_obj, swagger_client.V1Pod ), "Unexpected status object {} in Task.status".format( type(status_obj)) status = status_obj else: status = Pod(self.name, self.namespace)._get_status_obj() # pkg/api/v1/types.go in kubernetes source code describes the following PodPhase # const ( # // PodPending means the pod has been accepted by the system, but one or more of the containers # // has not been started. This includes time before being bound to a node, as well as time spent # // pulling images onto the host. # PodPending PodPhase = "Pending" # // PodRunning means the pod has been bound to a node and all of the containers have been started. # // At least one container is still running or is in the process of being restarted. # PodRunning PodPhase = "Running" # // PodSucceeded means that all containers in the pod have voluntarily terminated # // with a container exit code of 0, and the system is not going to restart any of these containers. # PodSucceeded PodPhase = "Succeeded" # // PodFailed means that all containers in the pod have terminated, and at least one container has # // terminated in a failure (exited with a non-zero exit code or was stopped by the system). # PodFailed PodPhase = "Failed" # // PodUnknown means that for some reason the state of the pod could not be obtained, typically due # // to an error in communicating with the host of the pod. # PodUnknown PodPhase = "Unknown" # ) pending = True if status.status.phase == "Pending" else False active = True if status.status.phase == "Running" or pending else False failed = True if status.status.phase == "Failed" else False completed = True if status.status.phase == "Succeeded" or failed else False if status.status.phase == "Unknown": raise AXPlatformException( "Status of task {} could not be found due to some temporary problem. Please retry" .format(self.name)) ret_status = { "active": active, "succeeded": completed, # TODO: Should this be removed "message": "", "reason": "", "failed": failed } if pending: # If the pod is pending it is likely stuck on container image pull failure try: for init_status in status.status.init_container_statuses or []: if init_status.state.waiting is not None: # find the first init container that is stuck on waiting and stuff the reason and message ret_status[ "reason"] = init_status.state.waiting.reason or "" ret_status[ "message"] = init_status.state.waiting.message or "" break except Exception: pass return ret_status def delete(self, force=False): """ Delete the task from kubernetes and returns the final status Returns: Last status of the job """ logger.debug("Task delete for {}".format(self.name)) with TaskOperation(self): status_obj = Pod(self.name, self.namespace)._get_status_obj() status = self.status(status_obj=status_obj) p = Pod(self.name, self.namespace) if not force: p.stop() # delete dependents resources = AXResources(existing=status_obj) resources.delete_all() # finally delete pod p.delete() return status def get_log_endpoint(self): url_run, _ = Pod(self.name, self.namespace).get_log_urls() return url_run def stop(self): Pod(self.name, self.namespace).stop() def _add_optional_envs(self, container): s3_endpoint = os.getenv("ARGO_S3_ENDPOINT", None) if s3_endpoint: container.add_env("ARGO_S3_ENDPOINT", value=s3_endpoint) access_key = os.getenv("ARGO_S3_ACCESS_KEY_ID", None) if access_key: container.add_env("ARGO_S3_ACCESS_KEY_ID", value_from_secret=("argo-access-key", "argo-access-key")) container.add_env("ARGO_S3_ACCESS_KEY_SECRET", value_from_secret=("argo-secret-key", "argo-secret-key")) aws_region = os.getenv("AX_AWS_REGION", None) if aws_region: container.add_env("AX_AWS_REGION", aws_region) def _container_to_pod(self, labels): # generate the service environment self._gen_service_env() pod_spec = PodSpec(self.name) pod_spec.restart_policy = "Never" main_container = self._container_spec() for vol_tag, vol in iteritems(self.service.template.inputs.volumes): # sanitize name for kubernetes vol_tag = string_to_dns_label(vol_tag) cvol = ContainerVolume(vol_tag, vol.mount_path) assert "resource_id" in vol.details and "filesystem" in vol.details, "resource_id and filesystem are required fields in volume details" cvol.set_type("AWS_EBS", vol_tag, vol.details["resource_id"], vol.details["filesystem"]) main_container.add_volume(cvol) logger.info("Mounting volume {} {} in {}".format( vol_tag, vol.details, vol.mount_path)) pod_spec.add_main_container(main_container) wait_container = self._generate_wait_container_spec() target_cloud = os.environ.get("AX_TARGET_CLOUD", Cloud().own_cloud()) wait_container.add_env("AX_TARGET_CLOUD", target_cloud) pod_spec.add_wait_container(wait_container) (cpu, mem, d_cpu, d_mem) = self._container_resources() main_container.add_resource_constraints("cpu_cores", cpu, limit=None) main_container.add_resource_constraints("mem_mib", mem, limit=mem) # handle artifacts self_sid = None if self.service.service_context: self_sid = self.service.service_context.service_instance_id # TODO: This function calls ax_artifact and needs to be rewritten. Ugly code. artifacts_container = pod_spec.enable_artifacts( self.software_info.image_namespace, self.software_info.image_version, self_sid, self.service.template.to_dict()) artifacts_container.add_env("AX_JOB_NAME", value=self.name) artifacts_container.add_env("AX_TARGET_CLOUD", target_cloud) artifacts_container.add_env("ARGO_LOG_BUCKET_NAME", os.environ.get("ARGO_LOG_BUCKET_NAME")) artifacts_container.add_env("ARGO_DATA_BUCKET_NAME", self._s3_bucket) self._add_optional_envs(artifacts_container) secret_resources = artifacts_container.add_configs_as_vols( self.service.template.get_all_configs(), self.name, self.namespace) self._resources.insert_all(secret_resources) if self.service.template.docker_spec: dind_c = pod_spec.enable_docker( self.service.template.docker_spec.graph_storage_size_mib) dind_c.add_volumes(pod_spec.get_artifact_vols()) dind_c.add_resource_constraints("cpu_cores", d_cpu, limit=None) dind_c.add_resource_constraints("mem_mib", d_mem, limit=d_mem) service_id = None if self.service.service_context: service_id = self.service.service_context.service_instance_id pod_spec.add_annotation("ax_serviceid", service_id) pod_spec.add_annotation("ax_costid", json.dumps(self.service.costid)) pod_spec.add_annotation("AX_SERVICE_ENV", self._gen_service_env()) for k in labels or []: pod_spec.add_label(k, labels[k]) return pod_spec.get_spec() def _container_spec(self): """ Converts service template to V1Container """ container = self.service.template c = Container(container.name, container.image, pull_policy=container.image_pull_policy) c.add_env("AX_CONTAINER_NAME", value=self.name) c.add_env("AX_ROOT_SERVICE_INSTANCE_ID", value=self.service.service_context.root_workflow_id) c.add_env("AX_SERVICE_INSTANCE_ID", value=self.service.service_context.service_instance_id) # Envs introduced to user c.add_env("AX_POD_NAME", value_from="metadata.name") c.add_env("AX_POD_IP", value_from="status.podIP") c.add_env("AX_POD_NAMESPACE", value_from="metadata.namespace") c.add_env("AX_NODE_NAME", value_from="spec.nodeName") c.add_env("AX_CLUSTER_META_URL_V1", value=CLUSTER_META_URL_V1) for env in container.env: (cfg_ns, cfg_name, cfg_key) = env.get_config() if cfg_ns is not None: # checking one of them is enough res = SecretResource(cfg_ns, cfg_name, self.name, self.namespace) res.create() self._resources.insert(res) c.add_env(env.name, value_from_secret=(res.get_resource_name(), cfg_key)) else: c.add_env(env.name, value=env.value) return c def _container_resources(self): container = self.service.template cpu = float(container.resources.cpu_cores) mem = float(container.resources.mem_mib) main_cpu = cpu main_mem = mem dind_cpu = 0.0 dind_mem = 0.0 if container.docker_spec: dind_cpu = float(container.docker_spec.cpu_cores) dind_mem = float(container.docker_spec.mem_mib) main_cpu = cpu main_mem = mem if dind_mem < MEM_MIB_MIN_DOCKER_ENABLE: raise ValueError( "mem_mib must have a minimum value of {} for docker support" .format(MEM_MIB_MIN_DOCKER_ENABLE)) if main_mem < MEM_MIB_MIN: raise ValueError( "mem_mib must have a minimum value of {}MB".format( MEM_MIB_MIN)) return main_cpu, main_mem, dind_cpu, dind_mem def _generate_wait_container_spec(self): main_container_name = self.service.template.name c = SidecarTask(main_container_name, self.software_info.image_namespace, self.software_info.image_version) c.add_env("AX_MAIN_CONTAINER_NAME", value=main_container_name) c.add_env("AX_JOB_NAME", value=self.name) c.add_env("AX_CUSTOMER_ID", AXCustomerId().get_customer_id()) c.add_env("AX_REGION", AXClusterConfig().get_region()) c.add_env("AX_CLUSTER_NAME_ID", self._name_id) c.add_env("ARGO_LOG_BUCKET_NAME", os.environ.get("ARGO_LOG_BUCKET_NAME")) c.add_env("ARGO_DATA_BUCKET_NAME", self._s3_bucket) self._add_optional_envs(c) return c def _gen_service_env(self): service_env = { "container": { "docker": {} }, "s3_bucket": self._s3_bucket, "s3_key_prefix": self._s3_key_prefix, "s3_bucket_ax_is_external": self._s3_bucket_ax_is_external, "s3_bucket_ax": self._s3_bucket_ax, "s3_key_prefix_ax": self._s3_key_prefix_ax, "docker_enable": self.service.template.docker_spec is not None } container = self.service.template # if container is not 'once', i.e. need to be restarted if failed, # set keep_return_code to be True so the inner_executor will pass-through the return code service_env["keep_return_code"] = not container.once service_env["container"]["docker"]["commands"] = container.command service_env["container"]["docker"]["args"] = container.args if container.inputs.count() > 0: service_env["container"]["inputs"] = container.inputs.to_dict() if container.outputs.count() > 0: service_env["container"]["outputs"] = container.outputs.to_dict() if self.service.service_context: service_env["container"][ "service_context"] = self.service.service_context.to_dict() service_env["container"]["service_context"][ "name"] = self.service.name # use base64 encode then decode to accommodate all chars in json # xxx todo: which unicode encode to use? return base64.b64encode(json.dumps(service_env)) @staticmethod def generate_name(conf): """ This function generates a kubernetes job name from a service template and also ensures that the generated name has some relationship to human readable job names while also being unique. :param conf: service template :return: job name string """ if not conf["template"].get("once", True): # name is fully specified by caller. This is currently only used by # workflow executor. No user jobs are expected to use this code path # Workflow executor generates a unique name for the workflow so we # do not have to worry about generating one for it. name = conf.get("name", None) if name is None: raise ValueError( "name is a required field in service object for once=false." ) return string_to_dns_label(name) else: return string_to_dns_label(conf["id"]) @staticmethod def insert_defaults(conf): """ This function inserts default that are required for Task processing :param conf: input conf :return: output conf """ if conf["template"].get("name", None) is None: conf["template"]["name"] = "main" else: conf["template"]["name"] = string_to_dns_label( conf["template"]["name"]) return conf
class Deployment(object): """ This class creates and manages a single deployment object A deployment consists of the following specifications in kubernetes 1. A kubernetes deployment spec 2. Zero or more kubernetes service specs 3. Zero or more ingress rules All functions in the object need to be idempotent. """ def __init__(self, name, application): """ Each deployment has a name and needs to be part of an application Application maps to a kubernetes namespace and the deployment will be created in this namespace. Args: name: deployment name application: the application that this deployment runs under """ self.name = name self.application = application self.client = KubernetesApiClient(use_proxy=True) self._nameid = AXClusterId().get_cluster_name_id() self._software_info = SoftwareInfo() self._app_obj = Application(application) self._resources = AXResources() self.spec = None def create(self, spec): """ Create a deployment from the template specified Idempotency: This function is idempotent. A create of identical spec will have no impact if the deployment already exists. If the spec is different then the existing deployment will be updated. """ @retry_unless(status_code=[404, 422]) def create_in_provider(k8s_spec): try: logger.info("Creating deployment %s in Kubernetes namespace %s", self.name, self.application) self.client.apisappsv1beta1_api.create_namespaced_deployment(k8s_spec, self.application) logger.info("Done creating deployment %s in Kubernetes namespace %s", self.name, self.application) except swagger_client.rest.ApiException as e: if e.status == 409: self.client.apisappsv1beta1_api.replace_namespaced_deployment(k8s_spec, self.application, self.name) else: raise e with DeploymentOperation(self): self.spec = spec # Do some template checks self._template_checks() # First create supplemental resources such as routes, ingress rules etc self._create_deployment_resources() # Now create the deployment spec d_spec = self._create_deployment_spec() # Store the resources in the deployment spec self._resources.finalize(d_spec) # Create the deployment object in kubernetes create_in_provider(d_spec) def delete(self, timeout=None): """ Delete the deployment. Idempotency: This function is idempotent. If deployment does not exist then delete will silently fail without raising any exceptions. Args: timeout: In seconds or None for infinite """ options = swagger_client.V1DeleteOptions() options.grace_period_seconds = 1 options.orphan_dependents = False def check_result(result): # True for retry False for done return not result @retry(retry_on_result=check_result, wait_fixed=2000, stop_max_delay=timeout) def wait_for_scale_to_zero(): logger.debug("Wait for scale of deployment to 0 for {} {}".format(self.application, self.name)) @retry_unless(swallow_code=[404]) def get_scale_from_provider(): return self.client.apisappsv1beta1_api.read_namespaced_scale_scale(self.application, self.name) scale = get_scale_from_provider() if scale is None: return True if scale.status.replicas == 0: return True return False @retry_unless(swallow_code=[404, 409]) def delete_in_provider(): logger.debug("Deleting deployment for {} {}".format(self.application, self.name)) self.client.apisappsv1beta1_api.delete_namespaced_deployment(options, self.application, self.name) def delete_rs_in_provider(): logger.debug("Deleting replica set for {} {}".format(self.application, self.name)) self.client.extensionsv1beta1.deletecollection_namespaced_replica_set(self.application, label_selector="deployment={}".format(self.name)) # now delete deployment object and replication set with DeploymentOperation(self): dep_obj = self._deployment_status() self._scale_to(0) wait_for_scale_to_zero() if dep_obj: resources = AXResources(existing=dep_obj) resources.delete_all() delete_in_provider() delete_rs_in_provider() def status(self): """ Get the status of the deployment. Returns: Returns the entire V1Deployment as a dict. If deployment is not found then this will raise an AXNotFoundException (404) """ # STEP 1: Get status of deployment stat = self._deployment_status() if stat is None: raise AXNotFoundException("Deployment {} not found in application {}".format(self.name, self.application)) dep_field_map = { "name": "metadata.name", "generation": "metadata.annotations.ax_generation", "desired_replicas": "status.replicas", "available_replicas": "status.available_replicas", "unavailable_replicas": "status.unavailable_replicas" } ret = KubeObject.swagger_obj_extract(stat, dep_field_map, serializable=True) # STEP 2: Get the pods for the deployment and events associated podlist = self._deployment_pods().items dep_events = self._app_obj.events(name=self.name) event_field_map = { "message": "message", "reason": "reason", "source": "source.component", "host": "source.host", "firstTS": "first_timestamp", "lastTS": "last_timestamp", "count": "count", "container": "involved_object.field_path", "type": "type" } ret["events"] = [] for event in dep_events: ret["events"].append(KubeObject.swagger_obj_extract(event, event_field_map, serializable=True)) ret["pods"] = [] for pod in podlist or []: # fill pod status and containers pstatus = Pod.massage_pod_status(pod) # fill events for pod pstatus["events"] = [] events = self._app_obj.events(name=pod.metadata.name) for event in events: pstatus["events"].append(KubeObject.swagger_obj_extract(event, event_field_map, serializable=True)) # fill pod failure information for pod based on events pstatus["failure"] = Deployment._pod_failed_info(pstatus) ret["pods"].append(pstatus) # STEP 3: From the deployment spec get the resources created by deployment # TODO: Add this when services are created by deployment return ret def get_labels(self): """ Get a dict of labels used for this deployment """ state = self._deployment_status() if state is None: raise AXNotFoundException("Did not find deployment {} in application {}".format(self.name, self.application)) return KubeObject.swagger_obj_extract(state, {"labels": "spec.selector.match_labels"})['labels'] @staticmethod def _pod_failed_info(pod_status): if pod_status["phase"] != "Pending": return None for ev in pod_status["events"] or []: if ev["reason"] == "Failed" and ev["source"] == "kubelet" and ev["type"] == "Warning" and \ "Failed to pull image" in ev["message"] and ev["count"] > 5: return { "reason": "ImagePullFailure", "message": ev["message"] } return None def scale(self, replicas): with DeploymentOperation(self): # Deployments with volumes can't be scaled to > 1. if replicas > 1: dep_obj = self._deployment_status() if dep_obj: resources = AXResources(existing=dep_obj) for type in resources.get_all_types(): if type.startswith("ax.platform.volumes"): raise AXApiForbiddenReq("Deployments with volumes can't be scaled to > 1 ({})".format(replicas)) self._scale_to(replicas) @retry_unless(swallow_code=[404]) def _deployment_status(self): return self.client.apisappsv1beta1_api.read_namespaced_deployment(self.application, self.name) @retry_unless(swallow_code=[404]) def _deployment_pods(self): return self.client.api.list_namespaced_pod(self.application, label_selector="deployment={}".format(self.name)) def _create_deployment_spec(self): pod_spec = PodSpec(self.name, namespace=self.application) main_container = self.spec.template.get_main_container() main_container_spec = self._create_main_container_spec(main_container) pod_spec.add_main_container(main_container_spec) container_vols = self._get_main_container_vols() main_container_spec.add_volumes(container_vols) hw_res = main_container.get_resources() main_container_spec.add_resource_constraints("cpu_cores", hw_res.cpu_cores, limit=None) main_container_spec.add_resource_constraints("mem_mib", hw_res.mem_mib, limit=None) artifacts_container = pod_spec.enable_artifacts(self._software_info.image_namespace, self._software_info.image_version, None, main_container.to_dict()) secret_resources = artifacts_container.add_configs_as_vols(main_container.get_all_configs(), self.name, self.application) self._resources.insert_all(secret_resources) # Set up special circumstances based on annotations # Check if we need to circumvent the executor script. This is needed for containers that run # special init processes such as systemd as these processes like to be pid 1 if main_container.executor_spec: main_container_spec.command = None if main_container.docker_spec is not None: raise ValueError("We do not support ax_ea_docker_enable with ax_ea_executor") # Does this container need to be privileged main_container_spec.privileged = main_container.privileged # Check if docker daemon sidecar needs to be added if main_container.docker_spec: # graph storage size is specified in GiB dind_container_spec = pod_spec.enable_docker(main_container.docker_spec.graph_storage_size_mib) dind_container_spec.add_volumes(pod_spec.get_artifact_vols()) dind_container_spec.add_resource_constraints("cpu_cores", main_container.docker_spec.cpu_cores, limit=None) dind_container_spec.add_resource_constraints("mem_mib", main_container.docker_spec.mem_mib, limit=None) dind_container_spec.add_volumes(container_vols) # Do we only need docker graph storage volume for the main container if main_container.graph_storage: dgs_vol = ContainerVolume("graph-storage-vol-only", main_container.graph_storage.mount_path) dgs_vol.set_type("DOCKERGRAPHSTORAGE", main_container.graph_storage.graph_storage_size_mib) main_container_spec.add_volume(dgs_vol) # set the pod hostname to value provided in main container spec pod_spec.hostname = main_container.hostname # TODO: This needs fixup. job name is used in init container to ask permission to start # TODO: Don't know if this is needed in deployment or not? artifacts_container.add_env("AX_JOB_NAME", value=self.application) artifacts_container.add_env("AX_DEPLOYMENT_NEW", value="True") if len(container_vols) > 0: tmp_container_vols = copy.deepcopy(container_vols) volume_paths = [] for v in tmp_container_vols: v.set_mount_path("/ax/fix" + v.volmount.mount_path) volume_paths.append(v.volmount.mount_path) artifacts_container.add_volumes(tmp_container_vols) logger.info("Volumes to chmod: %s", volume_paths) artifacts_container.add_env("AX_VOL_MOUNT_PATHS", value=str(volume_paths)) # add annotation for service env which will show up in artifacts container pod_spec.add_annotation("AX_SERVICE_ENV", self._generate_service_env(self.spec.template)) pod_spec.add_annotation("AX_IDENTIFIERS", self._get_identifiers()) if self.spec.costid: pod_spec.add_annotation("ax_costid", json.dumps(self.spec.costid)) pod_spec.add_label("deployment", self.name) pod_spec.add_label("application", self.application) pod_spec.add_label("tier", "user") pod_spec.add_label("deployment_id", self.spec.id) # now that pod is ready get its spec and wrap it in a deployment k8s_spec = self._generate_deployment_spec_for_pod(pod_spec.get_spec()) logger.info("Generated Kubernetes spec for deployment %s", self.name) return k8s_spec def _create_main_container_spec(self, container_template): """ :type container_template: argo.template.v1.container.ContainerTemplate :rtype Container """ logger.debug("Container template is {}".format(container_template)) name = string_to_dns_label(container_template.name) container_spec = Container(name, container_template.image, pull_policy=container_template.image_pull_policy) container_spec.parse_probe_spec(container_template) # Necessary envs for handshake container_spec.add_env("AX_HANDSHAKE_VERSION", value=CUR_RECORD_VERSION) # Envs introduced to user container_spec.add_env("AX_POD_NAME", value_from="metadata.name") container_spec.add_env("AX_POD_IP", value_from="status.podIP") container_spec.add_env("AX_POD_NAMESPACE", value_from="metadata.namespace") container_spec.add_env("AX_NODE_NAME", value_from="spec.nodeName") container_spec.add_env("AX_CLUSTER_META_URL_V1", value=CLUSTER_META_URL_V1) # envs from user spec for env in container_template.env: (cfg_ns, cfg_name, cfg_key) = env.get_config() if cfg_ns is not None: secret = SecretResource(cfg_ns, cfg_name, self.name, self.application) secret.create() self._resources.insert(secret) container_spec.add_env(env.name, value_from_secret=(secret.get_resource_name(), cfg_key)) else: container_spec.add_env(env.name, value=env.value) # Unix socket for applet applet_sock = ContainerVolume("applet", "/tmp/applatix.io/") applet_sock.set_type("HOSTPATH", "/var/run/") container_spec.add_volume(applet_sock) return container_spec @staticmethod def _get_valid_name_from_axrn(axrn): # AXRN's will have non-alphanumeric characters such as : / @, etc which K8S doesn't # like in its PVC name. Replace all non-alphanumeric characters with -. name_regex = re.compile(r"\W+") return name_regex.sub("-", axrn).replace("_", "-") def _get_main_container_vols(self): container_template = self.spec.template.get_main_container() ret = [] for vol_name, vol in iteritems(container_template.inputs.volumes): # sanitize the volume name for kubernetes vol_name = string_to_dns_label(vol_name) cvol = ContainerVolume(vol_name, vol.mount_path) assert "resource_id" in vol.details, "Volume resource-id absent in volume details" assert "filesystem" in vol.details, "Volume filesystem absent in volume details" cvol.set_type("AWS_EBS", vol_name, vol.details["resource_id"], vol.details["filesystem"]) logger.debug("Volume {} {} mounted at {}".format(vol_name, vol.details, vol.mount_path)) ret.append(cvol) return ret def _generate_service_env(self, template): return base64.b64encode(json.dumps(template.to_dict())) def _get_identifiers(self): return { "application_id": self.spec.app_generation, "deployment_id": self.spec.id, "static": { "application_id": self.spec.app_id, "deployment_id": self.spec.deployment_id } } def _generate_deployment_spec_for_pod(self, pod_spec): metadata = swagger_client.V1ObjectMeta() metadata.name = self.name dspec = swagger_client.V1beta1DeploymentSpec() dspec.strategy = self._get_strategy() if self.spec.template.min_ready_seconds: dspec.min_ready_seconds = self.spec.template.min_ready_seconds dspec.selector = swagger_client.V1LabelSelector() dspec.selector.match_labels = { "deployment": self.name } dspec.replicas = self.spec.template.scale.min dspec.template = pod_spec deployment_obj = swagger_client.V1beta1Deployment() deployment_obj.metadata = metadata deployment_obj.spec = dspec return deployment_obj def _create_deployment_resources(self): for route in self.spec.template.internal_routes: # ignore empty port spec if len(route.ports) == 0: logger.debug("Skipping internal route {} as port spec is empty".format(route.name)) continue ir = InternalRoute(route.name, self.application) ir.create(route.to_dict()["ports"], selector={"deployment": self.name}, owner=self.name) self._resources.insert(ir) logger.debug("Created route {}".format(ir)) for route in self.spec.template.external_routes: dns_name = route.dns_name() if dns_name.endswith("."): dns_name = dns_name[:-1] r = ExternalRoute(dns_name, self.application, {"deployment": self.name}, route.target_port, route.ip_white_list, route.visibility) try: elb_addr = visibility_to_elb_addr(route.visibility) elb_name = visibility_to_elb_name(route.visibility) except AXNotFoundException: if route.visibility == ExternalRouteVisibility.VISIBILITY_WORLD: raise AXNotFoundException("Could not find the public ELB. Please report this error to Applatix Support at [email protected]") else: assert route.visibility == ExternalRouteVisibility.VISIBILITY_ORGANIZATION, "Only world and organization are currently supported as visibility attributes" raise AXNotFoundException("Please create a private ELB using the template named 'ax_private_elb_creator_workflow' before using 'visibility=organization'") name = r.create(elb_addr,elb_name=elb_name) self._resources.insert(r) logger.debug("Created external route {} for {}/{}/{}".format(name, self.application, self.name, dns_name)) main_container = self.spec.template.get_main_container() for key_name, vol in iteritems(main_container.inputs.volumes): assert "resource_id" in vol.details, "Volume resource_id absent in volume details" name = vol.details.get("axrn", None) resource_id = vol.details.get("resource_id", None) assert name is not None and resource_id is not None, "axrn and resource_id are required details for volume {}".format(key_name) nv_res = AXNamedVolumeResource(name, resource_id) nv_res.create() self._resources.insert(nv_res) logger.debug("Using named volume resource {} in application {}".format(name, self.application)) @retry_unless(status_code=[422], swallow_code=[400, 404]) def _scale_to(self, replicas): logger.debug("Scaling deployment to {} for {} {}".format(replicas, self.application, self.name)) scale = swagger_client.V1beta1Scale() scale.spec = swagger_client.V1beta1ScaleSpec() scale.spec.replicas = replicas scale.metadata = swagger_client.V1ObjectMeta() scale.metadata.name = self.name scale.metadata.namespace = self.application self.client.apisappsv1beta1_api.replace_namespaced_scale_scale(scale, self.application, self.name) def _template_checks(self): if self.spec.template.scale and self.spec.template.scale.min > 1 and len(self.spec.template.volumes) >= 1: raise ValueError("Deployments with volumes can't have scale > 1 ({})".format(self.spec.template.scale.min)) def _get_strategy(self): s = swagger_client.V1beta1DeploymentStrategy() s.type = "RollingUpdate" if self.spec.template.strategy.type == "rolling_update" else "Recreate" if s.type == "RollingUpdate": rolling_update = swagger_client.V1beta1RollingUpdateDeployment() rolling_update.max_unavailable = self.spec.template.strategy.rolling_update.max_unavailable rolling_update.max_surge = self.spec.template.strategy.rolling_update.max_surge s.rolling_update = rolling_update return s