def enforce_affinity_policy(self, pod): """Prevent user from overriding affinity set by eve for workers.""" if 'affinity' in pod['spec']: raise LatentWorkerCannotSubstantiate( 'the affinity that is specified in the worker ' 'conflicts with Eve\'s pod placement policy. ' 'Please remove this section from the ' 'worker yaml file in %s' % self.template_path) if self.node_affinity: pod['spec']['affinity'] = { 'nodeAffinity': { 'requiredDuringSchedulingIgnoredDuringExecution': { 'nodeSelectorTerms': [{ 'matchExpressions': [{ 'key': self.node_affinity.key, 'operator': 'In', 'values': [self.node_affinity.value] }] }] } } }
def test_failed_substantiations_get_exception(self): """ If a latent worker fails to substantiate, the result is an exception. """ controller = LatentController('local') config_dict = { 'builders': [ BuilderConfig(name="testy", workernames=["local"], factory=BuildFactory(), ), ], 'workers': [controller.worker], 'protocols': {'null': {}}, # Disable checks about missing scheduler. 'multiMaster': True, } master = self.getMaster(config_dict) builder_id = self.successResultOf( master.data.updates.findBuilderId('testy')) # Trigger a buildrequest self.createBuildrequest(master, [builder_id]) # The worker fails to substantiate. controller.start_instance( Failure(LatentWorkerCannotSubstantiate("substantiation failed"))) # Flush the errors logged by the failure. self.flushLoggedErrors(LatentWorkerCannotSubstantiate) dbdict = yield master.db.builds.getBuildByNumber(builder_id, 1) # When the substantiation fails, the result is an exception. self.assertEqual(EXCEPTION, dbdict['results']) controller.auto_stop(True)
def _thd_start_instance(self, image, memory, volumes, buildnumber, docker_hook_version): self.logger.info('Checking if %r docker image exist.' % image) if not self.docker('images', '--format', '{{.Repository}}', image): self.logger.error('%r image not found.' % image) raise LatentWorkerCannotSubstantiate( 'Image %s not found on docker host' % image) cmd = [ 'run', '--name=%s' % self.instance, '--privileged', '--env', 'BUILDMASTER=%s' % self.master_fqdn, '--env', 'BUILDMASTER_PORT=%s' % self.pb_port, '--env', 'WORKERNAME=%s' % self.name, '--env', 'WORKERPASS=%s' % self.password, '--label', 'buildnumber=%s' % buildnumber, '--detach', '--cpus=%s' % self.max_cpus ] if memory: if (util.convert_to_bytes(memory) > util.convert_to_bytes( self.max_memory)): self.logger.error('Can not request %s RAM (max allowed %s).' % (memory, self.max_memory)) raise LatentWorkerCannotSubstantiate( 'Can not request %s RAM (max allowed is %s).' % (memory, self.max_memory)) cmd.append('--memory=%s' % memory) else: cmd.append('--memory=%s' % self.max_memory) cmd.extend(['--volume=%s' % volume for volume in volumes]) if docker_hook_version: cmd.append('--label=docker_hook=%s' % docker_hook_version) cmd.append(image) try: self.docker(*cmd) except CalledProcessError: raise LatentWorkerCannotSubstantiate( 'Docker run: CMD failed to start or died shortly after') self.logger.debug('Container created, Id: %s...' % self.instance) return [self.instance, image]
def enforce_active_deadline(self, pod): """Prevent stuck pod by setting an active deadline on it.""" if 'activeDeadlineSeconds' in pod['spec']: if pod['spec']['activeDeadlineSeconds'] > self.deadline: raise LatentWorkerCannotSubstantiate( 'activeDeadlineSeconds must be set to a value lower than ' '%d in %s' % (self.deadline, self.template_path)) else: pod['spec']['activeDeadlineSeconds'] = self.deadline
def enforce_restart_policy(self, pod): """Prevent kubernetes to restart any worker pod.""" if 'restartPolicy' in pod['spec']: if pod['spec']['restartPolicy'] != 'Never': raise LatentWorkerCannotSubstantiate( 'restartPolicy must be set to \'Never\' ' '(restart of buildbot-worker is not ' 'supported) in %s' % self.template_path) else: pod['spec']['restartPolicy'] = 'Never'
def resource_limit(kind, d, spec): if kind == 'memory': conv = util.convert_to_bytes bound = self.max_memory unit = 'RAM' elif kind == 'cpu': conv = util.convert_to_cpus bound = self.max_cpus unit = 'CPUs' else: raise ValueError('limits must be memory or cpu') try: if conv(d[kind]) > conv(bound): raise LatentWorkerCannotSubstantiate( 'Can\'t set request/limit to %s %s (max allowed ' 'is %s).' % (d[kind], unit, bound)) except KeyError: raise LatentWorkerCannotSubstantiate( 'All cpu & memory requests/limits must be set!') total[spec][kind] += conv(d[kind])
def _thd_start_pod(self, pod, wait_for_completion=False): """Start the pod resource provided as a dictionnary. This method will block until the pod has reached one of the stable condition RUNNING/COMPLETE/FAILED. """ pod_name = pod.get('metadata', {}).get('name', 'no_name') self.logger.debug( 'Starting pod %r with config:\n%s' % (pod_name, yaml.safe_dump(pod, default_flow_style=False))) try: instance = client.CoreV1Api().create_namespaced_pod( self.namespace, pod) except ApiException as ex: raise LatentWorkerCannotSubstantiate( 'Failed to create pod %s: %s' % (pod_name, ex.reason)) pending = [None, 'Pending', 'Unknown'] if wait_for_completion: pending.append('Running') duration = 0 while instance.status.phase in pending: sleep(self._poll_resolution) duration += self._poll_resolution try: instance = client.CoreV1Api().read_namespaced_pod_status( instance.metadata.name, self.namespace) except ApiException as ex: if wait_for_completion: # pod may have completed break raise LatentWorkerFailedToSubstantiate( 'Pod %s went missing: %s' % (instance.metadata.name, ex.reason)) # Ensure the pod is running or has run successfully if instance.status.phase in [None, 'Pending', 'Failed', 'Unknown']: try: raise KubePodWorkerCannotSubstantiate( 'Creating Pod %(pod)s failed (%(phase)s)', instance) finally: self.delete_pod(instance.metadata.name) if wait_for_completion: self.delete_pod(instance.metadata.name) return instance.metadata.name
def get_pod_config(self, name, source, variables): """Render and load valid pod template.""" try: template = Template(source, undefined=StrictUndefined) rendered_body = template.render(variables) except Exception as ex: raise LatentWorkerCannotSubstantiate('Unable to render %s (%s)' % (name, ex)) try: pod = yaml.load(rendered_body) except Exception as ex: raise LatentWorkerCannotSubstantiate( 'Unable to read yaml from %s (%s)' % (name, ex)) try: assert pod['kind'] == 'Pod' assert pod['spec']['containers'] except Exception as ex: raise LatentWorkerCannotSubstantiate( '%s is not a valid Kuberbetes pod ' 'definition (%s)' % (name, ex)) return pod
def test_failed_substantiations_get_exception(self): """ If a latent worker fails to substantiate, the result is an exception. """ controller, master, builder_id = \ yield self.create_single_worker_config() # Trigger a buildrequest yield self.createBuildrequest(master, [builder_id]) # The worker fails to substantiate. controller.start_instance( Failure(LatentWorkerCannotSubstantiate("substantiation failed"))) # Flush the errors logged by the failure. self.flushLoggedErrors(LatentWorkerCannotSubstantiate) # When the substantiation fails, the result is an exception. yield self.assertBuildResults(1, EXCEPTION) yield controller.auto_stop(True)
def _start_instance(self, stack_name, heat_template, heat_params): try: result = self.heat_client.stacks.create(stack_name=stack_name, template=heat_template, parameters=heat_params) except HTTPBadRequest as ex: raise LatentWorkerCannotSubstantiate( ex.error['error'].get('message')) self.stack_id = result['stack']['id'] stack = self.heat_client.stacks.get(stack_id=self.stack_id) while stack.stack_status == 'CREATE_IN_PROGRESS': time.sleep(POLLING_FREQ) stack = self.heat_client.stacks.get(stack_id=self.stack_id) if stack.stack_status != 'CREATE_COMPLETE': raise Exception(stack.stack_status) return stack
def start_instance(self, build): if self.instance is not None: raise ValueError('instance active') if self.registration is not None: self.pb_port = str(self.registration.getPBPort()) self.template_path = build.getProperty('worker_path') try: pod = self.get_pod_config(self.template_path, build.getProperty('worker_template'), variables={ 'images': build.getProperty('worker_images'), 'vars': build.getProperty('worker_vars'), }) repository = build.getProperty('repository') uuid = util.create_hash(repository, self.name) build.setProperty("worker_uuid", uuid, "Build") self.enforce_restart_policy(pod) self.enforce_affinity_policy(pod) self.enforce_gitconfig(pod) self.add_common_worker_env_vars(pod, build) self.add_common_worker_metadata(pod, build) self.enforce_resource_limits(pod) self.enforce_active_deadline(pod) self.configure_service_pod(pod, build) pod = yield self.interpolate_pod(pod, build) except LatentWorkerCannotSubstantiate: raise except Exception as ex: raise LatentWorkerCannotSubstantiate( 'Unable to validate pod config %s (%s)' % (self.template_path, ex)) res = yield threads.deferToThread(self._thd_start_instance, pod) defer.returnValue(res)
def _thd_start_instance(self, image, dockerfile, volumes, custom_context, encoding, buildargs): docker_client = self._getDockerClient() container_name = self.getContainerName() # cleanup the old instances instances = docker_client.containers(all=1, filters=dict(name=container_name)) container_name = "/{0}".format(container_name) for instance in instances: if container_name not in instance['Names']: continue try: docker_client.remove_container(instance['Id'], v=True, force=True) except NotFound: pass # that's a race condition found = False if image is not None: found = self._image_exists(docker_client, image) else: image = '{}_{}_image'.format(self.workername, id(self)) if (not found) and (dockerfile is not None): log.msg( "Image '{}' not found, building it from scratch".format(image)) if (custom_context): with open(dockerfile, 'rb') as fin: lines = docker_client.build(fileobj=fin, custom_context=custom_context, encoding=encoding, tag=image, buildargs=buildargs) else: lines = docker_client.build( fileobj=BytesIO(dockerfile.encode('utf-8')), tag=image, ) for line in lines: for streamline in _handle_stream_line(line): log.msg(streamline) imageExists = self._image_exists(docker_client, image) if ((not imageExists) or self.alwaysPull) and self.autopull: if (not imageExists): log.msg("Image '{}' not found, pulling from registry".format( image)) docker_client.pull(image) if (not self._image_exists(docker_client, image)): msg = 'Image "{}" not found on docker host.'.format(image) log.msg(msg) raise LatentWorkerCannotSubstantiate(msg) volumes, binds = self._thd_parse_volumes(volumes) host_conf = self.hostconfig.copy() host_conf['binds'] = binds if docker_py_version >= 2.2: host_conf['init'] = True host_conf = docker_client.create_host_config(**host_conf) instance = docker_client.create_container( image, self.command, name=self.getContainerName(), volumes=volumes, environment=self.createEnvironment(), host_config=host_conf) if instance.get('Id') is None: log.msg('Failed to create the container') raise LatentWorkerFailedToSubstantiate('Failed to start container') shortid = instance['Id'][:6] log.msg('Container created, Id: {}...'.format(shortid)) instance['image'] = image self.instance = instance docker_client.start(instance) log.msg('Container started') if self.followStartupLogs: logs = docker_client.attach(container=instance, stdout=True, stderr=True, stream=True) for line in logs: log.msg("docker VM {}: {}".format(shortid, line.strip())) if self.conn: break del logs return [instance['Id'], image]
def configure_service_pod(self, pod, build): """Define the pod that init/teardown an external service.""" self.service_pod = None worker_service = build.getProperty('worker_service') if worker_service is None: return if not self.service: # configuration does not provide any service raise LatentWorkerCannotSubstantiate( 'The worker is requesting access to a Kubernetes ' 'cluster but Eve is not configured to provide one; ' 'either remove the `service` section from the worker ' 'or reconfigure Eve.') buildid = build.getProperty('buildnumber') buildnumber = build.getProperty('bootstrap') repository = build.getProperty('repository') # retrieve unique user id and create namespace ids uuid = build.getProperty('worker_uuid') ns_plain = worker_service.get('namespaces', []) ns_hash = [ util.create_hash(repository, ns, buildnumber, buildid) for ns in ns_plain ] # store in properties for (plain, hashed) in zip(ns_plain, ns_hash): build.setProperty(plain, hashed, "Build") self.service_pod = self.get_pod_config( 'SERVICE_POD_TEMPLATE', SERVICE_POD_TEMPLATE, { 'buildid': buildid, 'buildnumber': buildnumber, 'image': self.service, 'namespaces': ns_hash, 'service_data': self.service_data, 'service_requests': worker_service.get('requests', {}), 'uuid': uuid, 'worker_pod_name': util.compute_instance_name(build), }) self.enforce_affinity_policy(self.service_pod) self.add_common_worker_env_vars(self.service_pod, build) self.add_common_worker_metadata(self.service_pod, build) # attach credentials to all containers in the worker pod pod['spec'].setdefault('volumes', []) pod['spec']['volumes'].append({ 'name': 'kubeconfig', 'secret': { 'secretName': uuid } }) for container in pod['spec']['containers']: container.setdefault('env', []) container['env'].extend([ { 'name': 'KUBECONFIG', 'value': '/.kubeconfig' }, ]) container.setdefault('volumeMounts', []) container['volumeMounts'].append({ 'name': 'kubeconfig', 'readOnly': True, 'mountPath': '/.kubeconfig', 'subPath': 'kubeconfig' })
def _thd_start_instance(self, docker_host, image, dockerfile, volumes, host_config, custom_context, encoding, target, buildargs, hostname): curr_client_args = self.client_args.copy() curr_client_args['base_url'] = docker_host docker_client = self._getDockerClient(curr_client_args) container_name = self.getContainerName() # cleanup the old instances instances = docker_client.containers(all=1, filters=dict(name=container_name)) container_name = f"/{container_name}" for instance in instances: if container_name not in instance['Names']: continue try: docker_client.remove_container(instance['Id'], v=True, force=True) except NotFound: pass # that's a race condition found = False if image is not None: found = self._image_exists(docker_client, image) else: image = f'{self.workername}_{id(self)}_image' if (not found) and (dockerfile is not None): log.msg(f"Image '{image}' not found, building it from scratch") if custom_context: with open(dockerfile, 'rb') as fin: lines = docker_client.build(fileobj=fin, custom_context=custom_context, encoding=encoding, tag=image, pull=self.alwaysPull, target=target, buildargs=buildargs) else: lines = docker_client.build( fileobj=BytesIO(dockerfile.encode('utf-8')), tag=image, pull=self.alwaysPull, target=target, ) for line in lines: for streamline in _handle_stream_line(line): log.msg(streamline) imageExists = self._image_exists(docker_client, image) if ((not imageExists) or self.alwaysPull) and self.autopull: if not imageExists: log.msg(f"Image '{image}' not found, pulling from registry") docker_client.pull(image) if not self._image_exists(docker_client, image): msg = f'Image "{image}" not found on docker host.' log.msg(msg) raise LatentWorkerCannotSubstantiate(msg) volumes, binds = self._thd_parse_volumes(volumes) host_config['binds'] = binds if docker_py_version >= 2.2 and 'init' not in host_config: host_config['init'] = True host_config = docker_client.create_host_config(**host_config) instance = docker_client.create_container( image, self.command, name=self.getContainerName(), volumes=volumes, environment=self.createEnvironment(), host_config=host_config, hostname=hostname) if instance.get('Id') is None: log.msg('Failed to create the container') raise LatentWorkerFailedToSubstantiate('Failed to start container') shortid = instance['Id'][:6] log.msg(f'Container created, Id: {shortid}...') instance['image'] = image self.instance = instance self._curr_client_args = curr_client_args try: docker_client.start(instance) except docker.errors.APIError as e: # The following was noticed in certain usage of Docker on Windows if 'The container operating system does not match the host operating system' in str( e): msg = f'Image used for build is wrong: {str(e)}' raise LatentWorkerCannotSubstantiate(msg) from e raise log.msg('Container started') if self.followStartupLogs: logs = docker_client.attach(container=instance, stdout=True, stderr=True, stream=True) for line in logs: log.msg(f"docker VM {shortid}: {line.strip()}") if self.conn: break del logs return [instance['Id'], image]
def enforce_resource_limits(self, pod): """Enforce resource request limits.""" total = { 'requests': { 'cpu': 0, 'memory': 0 }, 'limits': { 'cpu': 0, 'memory': 0 } } def resource_limit(kind, d, spec): if kind == 'memory': conv = util.convert_to_bytes bound = self.max_memory unit = 'RAM' elif kind == 'cpu': conv = util.convert_to_cpus bound = self.max_cpus unit = 'CPUs' else: raise ValueError('limits must be memory or cpu') try: if conv(d[kind]) > conv(bound): raise LatentWorkerCannotSubstantiate( 'Can\'t set request/limit to %s %s (max allowed ' 'is %s).' % (d[kind], unit, bound)) except KeyError: raise LatentWorkerCannotSubstantiate( 'All cpu & memory requests/limits must be set!') total[spec][kind] += conv(d[kind]) # Enforce per-container limits for container in pod['spec']['containers']: container.setdefault('resources', {}) requests = container['resources'].setdefault('requests', {}) limits = container['resources'].setdefault('limits', {}) resource_limit('memory', requests, 'requests') resource_limit('memory', limits, 'limits') resource_limit('cpu', requests, 'requests') resource_limit('cpu', limits, 'limits') # Enforce pod-wide limits if (total['requests']['memory'] > util.convert_to_bytes( self.max_memory)): raise LatentWorkerCannotSubstantiate( 'Total memory requested for pod can\'t exceed %s!' % self.max_memory) if (total['limits']['memory'] > util.convert_to_bytes( self.max_memory)): raise LatentWorkerCannotSubstantiate( 'Total memory limit for pod can\'t exceed %s!' % self.max_memory) if (total['requests']['cpu'] > util.convert_to_cpus(self.max_cpus)): raise LatentWorkerCannotSubstantiate( 'Total cpu requested for pod can\'t exceed %s!' % self.max_cpus) if (total['limits']['cpu'] > util.convert_to_cpus(self.max_cpus)): raise LatentWorkerCannotSubstantiate( 'Total cpu limit for pod can\'t exceed %s!' % self.max_cpus)
def _thd_start_instance(self, image, dockerfile, volumes): docker_client = self._getDockerClient() # cleanup the old instances instances = docker_client.containers( all=1, filters=dict(name=self.getContainerName())) for instance in instances: try: docker_client.remove_container(instance['Id'], v=True, force=True) except NotFound: pass # that's a race condition found = False if image is not None: found = self._image_exists(docker_client, image) else: image = '%s_%s_image' % (self.workername, id(self)) if (not found) and (dockerfile is not None): log.msg("Image '%s' not found, building it from scratch" % image) for line in docker_client.build(fileobj=BytesIO( dockerfile.encode('utf-8')), tag=image): for streamline in _handle_stream_line(line): log.msg(streamline) if ((not self._image_exists(docker_client, image))) and self.autopull: log.msg("Image '%s' not found, pulling from registry" % image) docker_client.pull(image) if (not self._image_exists(docker_client, image)): log.msg("Image '%s' not found" % image) raise LatentWorkerCannotSubstantiate( 'Image "%s" not found on docker host.' % image) volumes, binds = self._thd_parse_volumes(volumes) host_conf = self.hostconfig.copy() host_conf['binds'] = binds host_conf = docker_client.create_host_config(**host_conf) instance = docker_client.create_container( image, self.command, name=self.getContainerName(), volumes=volumes, environment=self.createEnvironment(), host_config=host_conf) if instance.get('Id') is None: log.msg('Failed to create the container') raise LatentWorkerFailedToSubstantiate('Failed to start container') shortid = instance['Id'][:6] log.msg('Container created, Id: %s...' % (shortid, )) instance['image'] = image self.instance = instance docker_client.start(instance) log.msg('Container started') if self.followStartupLogs: logs = docker_client.attach(container=instance, stdout=True, stderr=True, stream=True) for line in logs: log.msg("docker VM %s: %s" % (shortid, line.strip())) if self.conn: break del logs return [instance['Id'], image]
def _thd_start_instance(self, image, dockerfile, hostconfig, volumes): # License note: # copied from the original implementation with minor modification # to pass runtime configuration to the containers with self.docker_client() as docker_client: container_name = self.getContainerName() # cleanup the old instances instances = docker_client.containers( all=1, filters=dict(name=container_name)) container_name = '/{0}'.format(container_name) for instance in instances: if container_name not in instance['Names']: continue try: docker_client.remove_container(instance['Id'], v=True, force=True) except docker.errors.NotFound: pass # that's a race condition found = False if image is not None: found = self._image_exists(docker_client, image) else: worker_id = id(self) worker_name = self.workername image = f'{worker_name}_{worker_id}_image' if (not found) and (dockerfile is not None): log.info(f'Image {image} not found, building it from scratch') for line in docker_client.build( fileobj=BytesIO(dockerfile.encode('utf-8')), tag=image ): for streamline in _handle_stream_line(line): log.info(streamline) imageExists = self._image_exists(docker_client, image) if ((not imageExists) or self.alwaysPull) and self.autopull: if (not imageExists): log.info(f'Image {image} not found, pulling from registry') docker_client.pull(image) if (not self._image_exists(docker_client, image)): log.info(f'Image {image} not found') raise LatentWorkerCannotSubstantiate( f'Image {image} not found on docker host.' ) volumes, binds = self._thd_parse_volumes(volumes) hostconfig['binds'] = binds if docker_py_version >= 2.2: hostconfig['init'] = True instance = docker_client.create_container( image, self.command, name=self.getContainerName(), volumes=volumes, environment=self.createEnvironment(), host_config=docker_client.create_host_config( **hostconfig ) ) if instance.get('Id') is None: log.info('Failed to create the container') raise LatentWorkerFailedToSubstantiate( 'Failed to start container' ) shortid = instance['Id'][:6] log.info(f'Container created, Id: {shortid}...') instance['image'] = image self.instance = instance docker_client.start(instance) log.info('Container started') if self.followStartupLogs: logs = docker_client.attach( container=instance, stdout=True, stderr=True, stream=True) for line in logs: line = line.strip() log.info(f'docker VM {shortid}: {line}') if self.conn: break del logs return [instance['Id'], image]