def pre_add(self, item): item.name = item.name.replace('_', '-')[0:54].lower() if item.job_template is None: raise MyappException("Job Template 为必选") item.volume_mount = item.pipeline.project.volume_mount # 默认使用项目的配置 if item.job_template.volume_mount and item.job_template.volume_mount not in item.volume_mount: if item.volume_mount: item.volume_mount += "," + item.job_template.volume_mount else: item.volume_mount = item.job_template.volume_mount item.resource_memory = core.check_resource_memory(item.resource_memory) item.resource_cpu = core.check_resource_cpu(item.resource_cpu) self.merge_args(item, 'add') self.task_args_check(item) item.create_datetime = datetime.datetime.now() item.change_datetime = datetime.datetime.now() if core.get_gpu(item.resource_gpu)[0]: item.node_selector = item.node_selector.replace( 'cpu=true', 'gpu=true') else: item.node_selector = item.node_selector.replace( 'gpu=true', 'cpu=true')
def get_node_selector(self): project_node_selector = self.get_default_node_selector( self.pipeline.project.node_selector, self.resource_gpu, 'train') gpu_type = core.get_gpu(self.resource_gpu)[1] if gpu_type: project_node_selector += ',gpu-type=' + gpu_type return project_node_selector
def get_default_node_selector(self, node_selector, resource_gpu, model_type): # 先使用项目中定义的选择器 if not node_selector: node_selector = '' # 不使用用户的填写,完全平台决定 if core.get_gpu(resource_gpu)[0]: node_selector = node_selector.replace( 'cpu=true', 'gpu=true') + ",gpu=true,%s=true" % model_type else: node_selector = node_selector.replace( 'gpu=true', 'cpu=true') + ",cpu=true,%s=true" % model_type if 'org' not in node_selector: node_selector += ',org=public' node_selector = re.split(',|;|\n|\t', str(node_selector)) node_selector = [ selector.strip() for selector in node_selector if selector.strip() ] node_selector = ','.join(list(set(node_selector))) return node_selector
def pre_update(self, item): item.name = item.name.replace('_', '-')[0:54].lower() if item.job_template is None: raise MyappException("Job Template 为必选") # if item.job_template.volume_mount and item.job_template.volume_mount not in item.volume_mount: # if item.volume_mount: # item.volume_mount += ","+item.job_template.volume_mount # else: # item.volume_mount = item.job_template.volume_mount if item.outputs: core.validate_json(item.outputs) item.outputs = json.dumps(json.loads(item.outputs), indent=4, ensure_ascii=False) if item.expand: core.validate_json(item.expand) item.expand = json.dumps(json.loads(item.expand), indent=4, ensure_ascii=False) item.resource_memory = core.check_resource_memory( item.resource_memory, self.src_item_json.get('resource_memory', None)) item.resource_cpu = core.check_resource_cpu( item.resource_cpu, self.src_item_json.get('resource_cpu', None)) # item.resource_memory=core.check_resource_memory(item.resource_memory,self.src_resource_memory) # item.resource_cpu = core.check_resource_cpu(item.resource_cpu,self.src_resource_cpu) self.merge_args(item, 'update') self.task_args_check(item) item.change_datetime = datetime.datetime.now() if core.get_gpu(item.resource_gpu)[0]: item.node_selector = item.node_selector.replace( 'cpu=true', 'gpu=true') else: item.node_selector = item.node_selector.replace( 'gpu=true', 'cpu=true')
def get_kfjson(service, mykfservice): if not service: return None image_secrets = conf.get('HUBSECRET', []) user_hubsecrets = db.session.query(Repository.hubsecret).filter( Repository.created_by_fk == g.user.id).all() if user_hubsecrets: for hubsecret in user_hubsecrets: if hubsecret[0] not in image_secrets: image_secrets.append(hubsecret[0]) kfjson = { "minReplicas": service.min_replicas, "maxReplicas": service.max_replicas, "custom": { "affinity": { "nodeAffinity": { "requiredDuringSchedulingIgnoredDuringExecution": { "nodeSelectorTerms": [{ "matchExpressions": [ { "key": "gpu" if core.get_gpu( service.resource_gpu)[0] else "cpu", "operator": "In", "values": ["true"] }, ] }] } }, }, "imagePullSecrets": [{ "name": hubsecret } for hubsecret in image_secrets], "container": { "image": service.images, "imagePullPolicy": conf.get('IMAGE_PULL_POLICY', 'Always'), "name": mykfservice.name + "-" + service.name, "workingDir": service.working_dir if service.working_dir else None, "command": ["sh", "-c", service.command] if service.command else None, "resources": { "requests": { "cpu": service.resource_cpu, "memory": service.resource_memory } }, "env": [{ "name": env[0], "value": env[1] } for env in get_env(service.env)], # "volumeMounts": [ # { # "mountPath": "/mnt/%s" % service.created_by.username, # "name": "workspace", # "subPath": service.created_by.username # } # ], # "volumeDevices":[ # { # "devicePath": "/data/home/", # "name": "workspace" # } # ] } # "volumes": [ # { # "name": "workspace", # "persistentVolumeClaim": { # "claimName": "kubeflow-user-workspace" # } # } # ] } } return kfjson