def standard_bindings(cls, job): # Rewrite now-deprecated bindings into their proper form. return job.bind({ Ref.from_address('mesos.role'): '{{role}}', Ref.from_address('mesos.cluster'): '{{cluster}}', Ref.from_address('thermos.user'): '{{role}}', })
def task_links(self): # {{mesos.instance}} --> %shard_id% # {{thermos.ports[foo]}} --> %port:foo% task_links = self._job.task_links() if task_links is Empty: return task_links _, uninterp = task_links.interpolate() substitutions = {Ref.from_address('mesos.instance'): '%shard_id%'} port_scope = Ref.from_address('thermos.ports') for ref in uninterp: subscope = port_scope.scoped_to(ref) if subscope: substitutions[ref] = '%%port:%s%%' % subscope.action().value return task_links.bind(substitutions)
def task_links(self): # {{mesos.instance}} --> %shard_id% # {{thermos.ports[foo]}} --> %port:foo% task_links = self._job.task_links() if task_links is Empty: return task_links _, uninterp = task_links.interpolate() substitutions = { Ref.from_address('mesos.instance'): '%shard_id%' } port_scope = Ref.from_address('thermos.ports') for ref in uninterp: subscope = port_scope.scoped_to(ref) if subscope: substitutions[ref] = '%%port:%s%%' % subscope.action().value return task_links.bind(substitutions)
def mesos_task_instance_from_assigned_task(assigned_task): """Deserialize MesosTaskInstance from an AssignedTask thrift.""" thermos_task = assigned_task.task.executorConfig.data if not thermos_task: raise ValueError('Task did not have a thermos config!') try: json_blob = json.loads(thermos_task) except (TypeError, ValueError) as e: raise ValueError('Could not deserialize thermos config: %s' % e) # As part of the transition for MESOS-2133, we can send either a MesosTaskInstance # or we can be sending a MesosJob. So handle both possible cases. Once everyone # is using MesosJob, then we can begin to leverage additional information that # becomes available such as cluster. if 'instance' in json_blob: return MesosTaskInstance.json_loads(thermos_task) # This is a MesosJob mti, refs = task_instance_from_job(MesosJob.json_loads(thermos_task), assigned_task.instanceId) unbound_refs = [] for ref in refs: # If the ref is {{thermos.task_id}} or a subscope of # {{thermos.ports}}, it currently gets bound by the Thermos Runner, # so we must leave them unbound. # # {{thermos.user}} is a legacy binding which we can safely ignore. # # TODO(wickman) These should be rewritten by the mesos client to use # %%style%% replacements in order to allow us to better type-check configs # client-side. if ref == Ref.from_address('thermos.task_id'): continue if Ref.subscope(Ref.from_address('thermos.ports'), ref): continue if ref == Ref.from_address('thermos.user'): continue else: unbound_refs.append(ref) if len(unbound_refs) != 0: raise ValueError('Unexpected unbound refs: %s' % ' '.join(map(str, unbound_refs))) return mti
def test_choice_interpolation(): IntFloat = Choice((Integer, Float)) one = IntFloat('{{abc}}') two = IntFloat('{{a}}{{b}}') one_int = one.bind(abc=34) assert isinstance(one_int.interpolate()[0], Integer) assert one_int.check().ok() one_fl = one.bind(abc=123.354) assert isinstance(one_fl.interpolate()[0], Float) assert one_fl.check().ok() one_str = one.bind(abc="def") assert not one_str.check().ok() assert two.interpolate()[1] == [Ref.from_address('a'), Ref.from_address('b')] two_one = two.bind(a=12, b=23) assert two_one.check().ok() assert two_one.unwrap() == Integer(1223) two_two = two.bind(a=12, b=".34") assert two_two.check().ok() assert two_two.unwrap() == Float(12.34)
def add_binding_callback(option, opt, value, parser): if not getattr(parser.values, option_name, None): setattr(parser.values, option_name, []) if len(value.split('=')) != 2: raise ParseError('Binding must be of the form NAME=VALUE') name, value = value.split('=') try: ref = Ref.from_address(name) except Ref.InvalidRefError as e: raise ParseError('Could not parse ref %s: %s' % (name, e)) getattr(parser.values, option_name).append({ref: value})
def bind(self, config, match, env, binding_dict): cluster = CLUSTERS[config.cluster()] image = match ref_str = 'docker.image[%s][%s]' % image[2:4] ref = Ref.from_address(ref_str) if ref_str in binding_dict: (image_data, image_struct) = binding_dict[ref_str] else: image_data = '%s:%s' % (image[2], image[3]) image_struct = DockerRegistryClient.resolve(cluster, image[2], image[3]) binding_dict[ref_str] = (image_data, image_struct) config.bind({ref: image_struct})
def extract(cls, obj): port_scope = Ref.from_address('thermos.ports') _, uninterp = obj.interpolate() ports = [] for ref in uninterp: subscope = port_scope.scoped_to(ref) if subscope is not None: if not subscope.is_index(): raise cls.InvalidPorts( 'Bad port specification "%s" (should be of form "thermos.ports[name]"' % ref.address()) ports.append(subscope.action().value) return ports
def bind(self, config, match, env, binding_dict): cluster = CLUSTERS[config.cluster()] image = match ref_str = 'docker.image[%s][%s]' % image[2:4] ref = Ref.from_address(ref_str) if ref_str in binding_dict: (image_data, image_struct) = binding_dict[ref_str] else: image_data = '%s:%s' % (image[2], image[3]) image_struct = DockerRegistryClient.resolve( cluster, image[2], image[3]) binding_dict[ref_str] = (image_data, image_struct) config.bind({ref: image_struct})
def binding_parser(binding): """Pystachio takes bindings in the form of a list of dictionaries. Each pystachio binding becomes another dictionary in the list. So we need to convert the bindings specified by the user from a list of "name=value" formatted strings to a list of the dictionaries expected by pystachio. """ binding_parts = binding.split("=") if len(binding_parts) != 2: raise ValueError('Binding parameter must be formatted name=value') try: ref = Ref.from_address(binding_parts[0]) except Ref.InvalidRefError as e: raise ValueError("Could not parse binding parameter %s: %s" % (binding, e)) return {ref: binding_parts[1]}
def binding_parser(binding): """Pystachio takes bindings in the form of a list of dictionaries. Each pystachio binding becomes another dictionary in the list. So we need to convert the bindings specified by the user from a list of "name=value" formatted strings to a list of the dictionaries expected by pystachio. """ binding_parts = binding.split("=", 1) if len(binding_parts) < 2: raise ValueError('Binding parameter must be formatted name=value') try: ref = Ref.from_address(binding_parts[0]) except Ref.InvalidRefError as e: raise ValueError("Could not parse binding parameter %s: %s" % (binding, e)) return {ref: binding_parts[1]}
def bind(self, config, match, env, binding_dict): cluster = CLUSTERS[config.cluster()] name, version = match[1:3] ref_str = 'sacker[%s][%s]' % (name, version) ref = Ref.from_address(ref_str) if ref_str in binding_dict: s3_struct = binding_dict[ref_str] else: s3_struct = get_sacker_binding(cluster, name, version) binding_dict[ref_str] = s3_struct config.bind({ref: s3_struct}) config.add_metadata( key='sacker', value='%s/%s sha:%s' % (name, s3_struct.version(), s3_struct.sha()) )
def test_choice_in_struct(): class SOne(Struct): a = Choice((Integer, Float)) b = String one = SOne(a=12, b="abc") assert one.check().ok() assert one.interpolate()[0].a().unwrap() == Integer(12) two = SOne(a="1{{q}}2", b="hi there") assert not two.check().ok() refs = two.interpolate()[1] assert refs == [Ref.from_address('q')] two_int = two.bind(q="34") assert two_int.check().ok() assert two_int.a().unwrap() == Integer(1342) two_fl = two.bind(q="3.4") assert two_fl.check().ok() assert two_fl.a().unwrap() == Float(13.42) two_str = two.bind(q="abc") assert not two_str.check().ok()
def filter_aliased_fields(job): return job(**dict((key, Empty) for key in ALIASED_FIELDS)) def assert_valid_field(field, identifier): VALID_IDENTIFIER = re.compile(GOOD_IDENTIFIER_PATTERN_PYTHON) if not isinstance(identifier, Compatibility.string): raise InvalidConfig("%s must be a string" % field) if not VALID_IDENTIFIER.match(identifier): raise InvalidConfig("Invalid %s '%s'" % (field, identifier)) return identifier MESOS_INSTANCE_REF = Ref.from_address('mesos.instance') MESOS_HOSTNAME_REF = Ref.from_address('mesos.hostname') THERMOS_PORT_SCOPE_REF = Ref.from_address('thermos.ports') THERMOS_TASK_ID_REF = Ref.from_address('thermos.task_id') def convert(job, metadata=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name()))) task_raw = job.task()
def convert(job, metadata=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name()))) task_raw = job.task() MB = 1024 * 1024 task = TaskConfig() def not_empty_or(item, default): return default if item is Empty else fully_interpolated(item) # job components task.production = fully_interpolated(job.production(), bool) task.isService = select_service_bit(job) task.maxTaskFailures = fully_interpolated(job.max_task_failures()) task.priority = fully_interpolated(job.priority()) task.contactEmail = not_empty_or(job.contact(), None) task.tier = not_empty_or(job.tier(), None) if job.has_partition_policy(): task.partitionPolicy = PartitionPolicy( fully_interpolated(job.partition_policy().reschedule()), fully_interpolated(job.partition_policy().delay_secs())) # Add metadata to a task, to display in the scheduler UI. metadata_set = frozenset() if job.has_metadata(): customized_metadata = job.metadata() metadata_set |= frozenset( (str(fully_interpolated(key_value_metadata.key())), str(fully_interpolated(key_value_metadata.value()))) for key_value_metadata in customized_metadata) metadata_set |= frozenset( (str(key), str(value)) for key, value in metadata) task.metadata = frozenset( Metadata(key=key, value=value) for key, value in metadata_set) # task components if not task_raw.has_resources(): raise InvalidConfig('Task must specify resources!') if (fully_interpolated(task_raw.resources().ram()) == 0 or fully_interpolated(task_raw.resources().disk()) == 0): raise InvalidConfig( 'Must specify ram and disk resources, got ram:%r disk:%r' % (fully_interpolated(task_raw.resources().ram()), fully_interpolated(task_raw.resources().disk()))) numCpus = fully_interpolated(task_raw.resources().cpu()) ramMb = fully_interpolated(task_raw.resources().ram()) / MB diskMb = fully_interpolated(task_raw.resources().disk()) / MB if numCpus <= 0 or ramMb <= 0 or diskMb <= 0: raise InvalidConfig( 'Task has invalid resources. cpu/ramMb/diskMb must all be positive: ' 'cpu:%r ramMb:%r diskMb:%r' % (numCpus, ramMb, diskMb)) numGpus = fully_interpolated(task_raw.resources().gpu()) task.resources = frozenset([ Resource(numCpus=numCpus), Resource(ramMb=ramMb), Resource(diskMb=diskMb) ] + [Resource(namedPort=p) for p in ports] + ([Resource(numGpus=numGpus)] if numGpus else [])) task.job = key task.owner = owner task.taskLinks = {} # See AURORA-739 task.constraints = constraints_to_thrift( not_empty_or(job.constraints(), {})) task.container = create_container_config(job.container()) underlying, refs = job.interpolate() # need to fake an instance id for the sake of schema checking underlying_checked = underlying.bind(mesos={ 'instance': 31337, 'hostname': '' }) try: ThermosTaskValidator.assert_valid_task(underlying_checked.task()) except ThermosTaskValidator.InvalidTaskError as e: raise InvalidConfig('Task is invalid: %s' % e) if not underlying_checked.check().ok(): raise InvalidConfig('Job not fully specified: %s' % underlying.check().message()) unbound = [] for ref in refs: if ref in (THERMOS_TASK_ID_REF, MESOS_INSTANCE_REF, MESOS_HOSTNAME_REF) or (Ref.subscope( THERMOS_PORT_SCOPE_REF, ref)): continue unbound.append(ref) if unbound: raise InvalidConfig('Config contains unbound variables: %s' % ' '.join(map(str, unbound))) # set the executor that will be used by the Mesos task. Thermos is the default executor = job.executor_config() if fully_interpolated(executor.name()) == AURORA_EXECUTOR_NAME: task.executorConfig = ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=filter_aliased_fields(underlying).json_dumps()) else: task.executorConfig = ExecutorConfig( name=fully_interpolated(executor.name()), data=fully_interpolated(executor.data())) return JobConfiguration( key=key, owner=owner, cronSchedule=not_empty_or(job.cron_schedule(), None), cronCollisionPolicy=select_cron_policy(job.cron_collision_policy()), taskConfig=task, instanceCount=fully_interpolated(job.instances()))
def filter_aliased_fields(job): return job(**dict((key, Empty) for key in ALIASED_FIELDS)) def assert_valid_field(field, identifier): VALID_IDENTIFIER = re.compile(GOOD_IDENTIFIER_PATTERN_PYTHON) if not isinstance(identifier, Compatibility.string): raise InvalidConfig("%s must be a string" % field) if not VALID_IDENTIFIER.match(identifier): raise InvalidConfig("Invalid %s '%s'" % (field, identifier)) return identifier MESOS_INSTANCE_REF = Ref.from_address('mesos.instance') MESOS_HOSTNAME_REF = Ref.from_address('mesos.hostname') THERMOS_PORT_SCOPE_REF = Ref.from_address('thermos.ports') THERMOS_TASK_ID_REF = Ref.from_address('thermos.task_id') def convert(job, metadata=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name())))
def filter_aliased_fields(job): return job(**dict((key, Empty) for key in ALIASED_FIELDS)) def assert_valid_field(field, identifier): VALID_IDENTIFIER = re.compile(GOOD_IDENTIFIER_PATTERN_PYTHON) if not isinstance(identifier, Compatibility.string): raise InvalidConfig("%s must be a string" % field) if not VALID_IDENTIFIER.match(identifier): raise InvalidConfig("Invalid %s '%s'" % (field, identifier)) return identifier MESOS_INSTANCE_REF = Ref.from_address("mesos.instance") MESOS_HOSTNAME_REF = Ref.from_address("mesos.hostname") THERMOS_PORT_SCOPE_REF = Ref.from_address("thermos.ports") THERMOS_TASK_ID_REF = Ref.from_address("thermos.task_id") def convert(job, metadata=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(user=getpass.getuser()) key = JobKey( role=assert_valid_field("role", fully_interpolated(job.role())), environment=assert_valid_field("environment", fully_interpolated(job.environment())), name=assert_valid_field("name", fully_interpolated(job.name())), )
def uncached_bind(self, config, match, env, binding_dict): self.uncached_binds += 1 ref = Ref.from_address('%s[%s]' % match) binding = {ref: 'C(%s)' % match[1]} config.bind(binding) return binding
def bind(self, config, match, env, binding_dict): # TODO(wickman) You should be able to take a match tuple + matcher # object and return the ref. self.binds += 1 ref = Ref.from_address('%s[%s]' % match) config.bind({ref: 'U(%s)' % match[1]})
def convert(job, packages=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(role=fully_interpolated(job.role()), user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name()))) task_raw = job.task() MB = 1024 * 1024 task = TaskConfig() def not_empty_or(item, default): return default if item is Empty else fully_interpolated(item) # job components task.jobName = fully_interpolated(job.name()) task.environment = fully_interpolated(job.environment()) task.production = fully_interpolated(job.production(), bool) task.isService = select_service_bit(job) task.maxTaskFailures = fully_interpolated(job.max_task_failures()) task.priority = fully_interpolated(job.priority()) task.contactEmail = not_empty_or(job.contact(), None) # Add package tuples to a task, to display in the scheduler UI. task.packages = frozenset( Package(role=str(role), name=str(package_name), version=int(version)) for role, package_name, version in packages) # task components if not task_raw.has_resources(): raise InvalidConfig('Task must specify resources!') if (fully_interpolated(task_raw.resources().ram()) == 0 or fully_interpolated(task_raw.resources().disk()) == 0): raise InvalidConfig( 'Must specify ram and disk resources, got ram:%r disk:%r' % (fully_interpolated(task_raw.resources().ram()), fully_interpolated(task_raw.resources().disk()))) task.numCpus = fully_interpolated(task_raw.resources().cpu()) task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0: raise InvalidConfig( 'Task has invalid resources. cpu/ramMb/diskMb must all be positive: ' 'cpu:%r ramMb:%r diskMb:%r' % (task.numCpus, task.ramMb, task.diskMb)) task.owner = owner task.requestedPorts = ports task.taskLinks = not_empty_or(job.task_links(), {}) task.constraints = constraints_to_thrift( not_empty_or(job.constraints(), {})) underlying, refs = job.interpolate() # need to fake an instance id for the sake of schema checking underlying_checked = underlying.bind(mesos={'instance': 31337}) try: ThermosTaskValidator.assert_valid_task(underlying_checked.task()) except ThermosTaskValidator.InvalidTaskError as e: raise InvalidConfig('Task is invalid: %s' % e) if not underlying_checked.check().ok(): raise InvalidConfig('Job not fully specified: %s' % underlying.check().message()) unbound = [] for ref in refs: if ref == THERMOS_TASK_ID_REF or ref == MESOS_INSTANCE_REF or ( Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)): continue unbound.append(ref) if unbound: raise InvalidConfig('Config contains unbound variables: %s' % ' '.join(map(str, unbound))) cron_schedule = not_empty_or(job.cron_schedule(), '') cron_policy = select_cron_policy(job.cron_policy(), job.cron_collision_policy()) task.executorConfig = ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=filter_aliased_fields(underlying).json_dumps()) return JobConfiguration(key=key, owner=owner, cronSchedule=cron_schedule, cronCollisionPolicy=cron_policy, taskConfig=task, instanceCount=fully_interpolated(job.instances()))
def convert(job, metadata=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name()))) task_raw = job.task() MB = 1024 * 1024 task = TaskConfig() def not_empty_or(item, default): return default if item is Empty else fully_interpolated(item) # job components task.production = fully_interpolated(job.production(), bool) task.isService = select_service_bit(job) task.maxTaskFailures = fully_interpolated(job.max_task_failures()) task.priority = fully_interpolated(job.priority()) task.contactEmail = not_empty_or(job.contact(), None) task.tier = not_empty_or(job.tier(), None) # Add metadata to a task, to display in the scheduler UI. task.metadata = frozenset(Metadata(key=str(key), value=str(value)) for key, value in metadata) # task components if not task_raw.has_resources(): raise InvalidConfig('Task must specify resources!') if (fully_interpolated(task_raw.resources().ram()) == 0 or fully_interpolated(task_raw.resources().disk()) == 0): raise InvalidConfig('Must specify ram and disk resources, got ram:%r disk:%r' % ( fully_interpolated(task_raw.resources().ram()), fully_interpolated(task_raw.resources().disk()))) task.numCpus = fully_interpolated(task_raw.resources().cpu()) task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0: raise InvalidConfig('Task has invalid resources. cpu/ramMb/diskMb must all be positive: ' 'cpu:%r ramMb:%r diskMb:%r' % (task.numCpus, task.ramMb, task.diskMb)) task.job = key task.owner = owner task.requestedPorts = ports task.taskLinks = {} # See AURORA-739 task.constraints = constraints_to_thrift(not_empty_or(job.constraints(), {})) task.container = create_container_config(job.container()) underlying, refs = job.interpolate() # need to fake an instance id for the sake of schema checking underlying_checked = underlying.bind(mesos={'instance': 31337, 'hostname': ''}) try: ThermosTaskValidator.assert_valid_task(underlying_checked.task()) except ThermosTaskValidator.InvalidTaskError as e: raise InvalidConfig('Task is invalid: %s' % e) if not underlying_checked.check().ok(): raise InvalidConfig('Job not fully specified: %s' % underlying.check().message()) unbound = [] for ref in refs: if ref in (THERMOS_TASK_ID_REF, MESOS_INSTANCE_REF, MESOS_HOSTNAME_REF) or ( Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)): continue unbound.append(ref) if unbound: raise InvalidConfig('Config contains unbound variables: %s' % ' '.join(map(str, unbound))) task.executorConfig = ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=filter_aliased_fields(underlying).json_dumps()) return JobConfiguration( key=key, owner=owner, cronSchedule=not_empty_or(job.cron_schedule(), None), cronCollisionPolicy=select_cron_policy(job.cron_collision_policy()), taskConfig=task, instanceCount=fully_interpolated(job.instances()))