def __init__(self, task, checkpoint_root, sandbox, log_dir=None, task_id=None, portmap=None, user=None, chroot=False, clock=time, universal_handler=None, planner_class=TaskPlanner): """ required: task (config.Task) = the task to run checkpoint_root (path) = the checkpoint root sandbox (path) = the sandbox in which the path will be run [if None, cwd will be assumed, but garbage collection will be disabled for this task.] optional: log_dir (string) = directory to house stdout/stderr logs. If not specified, logs will be written into the sandbox directory under .logs/ task_id (string) = bind to this task id. if not specified, will synthesize an id based upon task.name() portmap (dict) = a map (string => integer) from name to port, e.g. { 'http': 80 } user (string) = the user to run the task as. if not current user, requires setuid privileges. chroot (boolean) = whether or not to chroot into the sandbox prior to exec. clock (time interface) = the clock to use throughout universal_handler = checkpoint record handler (only used for testing) planner_class (TaskPlanner class) = TaskPlanner class to use for constructing the task planning policy. """ if not issubclass(planner_class, TaskPlanner): raise TypeError('planner_class must be a TaskPlanner.') self._clock = clock launch_time = self._clock.time() launch_time_ms = '%06d' % int((launch_time - int(launch_time)) * 10**6) if not task_id: self._task_id = '%s-%s.%s' % (task.name(), time.strftime('%Y%m%d-%H%M%S', time.localtime(launch_time)), launch_time_ms) else: self._task_id = task_id current_user = TaskRunnerHelper.get_actual_user() self._user = user or current_user # TODO(wickman) This should be delegated to the ProcessPlatform / Helper if self._user != current_user: if os.geteuid() != 0: raise ValueError('task specifies user as %s, but %s does not have setuid permission!' % ( self._user, current_user)) self._portmap = portmap or {} self._launch_time = launch_time self._log_dir = log_dir or os.path.join(sandbox, '.logs') self._pathspec = TaskPath(root=checkpoint_root, task_id=self._task_id, log_dir=self._log_dir) try: ThermosTaskValidator.assert_valid_task(task) ThermosTaskValidator.assert_valid_ports(task, self._portmap) except ThermosTaskValidator.InvalidTaskError as e: raise self.InvalidTask('Invalid task: %s' % e) context = ThermosContext( task_id=self._task_id, ports=self._portmap, user=self._user) self._task, uninterp = (task % Environment(thermos=context)).interpolate() if len(uninterp) > 0: raise self.InvalidTask('Failed to interpolate task, missing: %s' % ', '.join(str(ref) for ref in uninterp)) try: ThermosTaskValidator.assert_same_task(self._pathspec, self._task) except ThermosTaskValidator.InvalidTaskError as e: raise self.InvalidTask('Invalid task: %s' % e) self._plan = None # plan currently being executed (updated by Handlers) self._regular_plan = planner_class(self._task, clock=clock, process_filter=lambda proc: proc.final().get() == False) self._finalizing_plan = planner_class(self._task, clock=clock, process_filter=lambda proc: proc.final().get() == True) self._chroot = chroot self._sandbox = sandbox self._terminal_state = None self._ckpt = None self._process_map = dict((p.name().get(), p) for p in self._task.processes()) self._task_processes = {} self._stages = dict((state, stage(self)) for state, stage in self.STAGES.items()) self._finalization_start = None self._preemption_deadline = None self._watcher = ProcessMuxer(self._pathspec) self._state = RunnerState(processes = {}) # create runner state universal_handler = universal_handler or TaskRunnerUniversalHandler self._dispatcher = CheckpointDispatcher() self._dispatcher.register_handler(universal_handler(self)) self._dispatcher.register_handler(TaskRunnerProcessHandler(self)) self._dispatcher.register_handler(TaskRunnerTaskHandler(self)) # recover checkpointed runner state and update plan self._recovery = True self._replay_runner_ckpt()
def convert(job, packages=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(role=fully_interpolated(job.role()), user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name()))) task_raw = job.task() MB = 1024 * 1024 task = TaskConfig() def not_empty_or(item, default): return default if item is Empty else fully_interpolated(item) # job components task.jobName = fully_interpolated(job.name()) task.environment = fully_interpolated(job.environment()) task.production = fully_interpolated(job.production(), bool) task.isService = select_service_bit(job) task.maxTaskFailures = fully_interpolated(job.max_task_failures()) task.priority = fully_interpolated(job.priority()) task.contactEmail = not_empty_or(job.contact(), None) # Add package tuples to a task, to display in the scheduler UI. task.packages = frozenset( Package(role=str(role), name=str(package_name), version=int(version)) for role, package_name, version in packages) # task components if not task_raw.has_resources(): raise InvalidConfig('Task must specify resources!') if (fully_interpolated(task_raw.resources().ram()) == 0 or fully_interpolated(task_raw.resources().disk()) == 0): raise InvalidConfig('Must specify ram and disk resources, got ram:%r disk:%r' % ( fully_interpolated(task_raw.resources().ram()), fully_interpolated(task_raw.resources().disk()))) task.numCpus = fully_interpolated(task_raw.resources().cpu()) task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0: raise InvalidConfig('Task has invalid resources. cpu/ramMb/diskMb must all be positive: ' 'cpu:%r ramMb:%r diskMb:%r' % (task.numCpus, task.ramMb, task.diskMb)) task.owner = owner task.requestedPorts = ports task.taskLinks = not_empty_or(job.task_links(), {}) task.constraints = constraints_to_thrift(not_empty_or(job.constraints(), {})) underlying, refs = job.interpolate() # need to fake an instance id for the sake of schema checking underlying_checked = underlying.bind(mesos = {'instance': 31337}) try: ThermosTaskValidator.assert_valid_task(underlying_checked.task()) except ThermosTaskValidator.InvalidTaskError as e: raise InvalidConfig('Task is invalid: %s' % e) if not underlying_checked.check().ok(): raise InvalidConfig('Job not fully specified: %s' % underlying.check().message()) unbound = [] for ref in refs: if ref == THERMOS_TASK_ID_REF or ref == MESOS_INSTANCE_REF or ( Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)): continue unbound.append(ref) if unbound: raise InvalidConfig('Config contains unbound variables: %s' % ' '.join(map(str, unbound))) cron_schedule = not_empty_or(job.cron_schedule(), '') cron_policy = select_cron_policy(job.cron_policy(), job.cron_collision_policy()) task.executorConfig = ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=filter_aliased_fields(underlying).json_dumps()) return JobConfiguration( key=key, owner=owner, cronSchedule=cron_schedule, cronCollisionPolicy=cron_policy, taskConfig=task, instanceCount=fully_interpolated(job.instances()))
def on_initialization(self, header): log.debug('_on_initialization: %s' % header) ThermosTaskValidator.assert_valid_task(self._runner.task) ThermosTaskValidator.assert_valid_ports(self._runner.task, header.ports) self._checkpoint(RunnerCkpt(runner_header=header))
def convert(job, packages=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(role=fully_interpolated(job.role()), user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name()))) task_raw = job.task() MB = 1024 * 1024 task = TaskConfig() def not_empty_or(item, default): return default if item is Empty else fully_interpolated(item) # job components task.jobName = fully_interpolated(job.name()) task.environment = fully_interpolated(job.environment()) task.production = fully_interpolated(job.production(), bool) task.isService = select_service_bit(job) task.maxTaskFailures = fully_interpolated(job.max_task_failures()) task.priority = fully_interpolated(job.priority()) task.contactEmail = not_empty_or(job.contact(), None) # Add package tuples to a task, to display in the scheduler UI. task.packages = frozenset( Package(role=str(role), name=str(package_name), version=int(version)) for role, package_name, version in packages) # task components if not task_raw.has_resources(): raise InvalidConfig('Task must specify resources!') if (fully_interpolated(task_raw.resources().ram()) == 0 or fully_interpolated(task_raw.resources().disk()) == 0): raise InvalidConfig( 'Must specify ram and disk resources, got ram:%r disk:%r' % (fully_interpolated(task_raw.resources().ram()), fully_interpolated(task_raw.resources().disk()))) task.numCpus = fully_interpolated(task_raw.resources().cpu()) task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0: raise InvalidConfig( 'Task has invalid resources. cpu/ramMb/diskMb must all be positive: ' 'cpu:%r ramMb:%r diskMb:%r' % (task.numCpus, task.ramMb, task.diskMb)) task.owner = owner task.requestedPorts = ports task.taskLinks = not_empty_or(job.task_links(), {}) task.constraints = constraints_to_thrift( not_empty_or(job.constraints(), {})) underlying, refs = job.interpolate() # need to fake an instance id for the sake of schema checking underlying_checked = underlying.bind(mesos={'instance': 31337}) try: ThermosTaskValidator.assert_valid_task(underlying_checked.task()) except ThermosTaskValidator.InvalidTaskError as e: raise InvalidConfig('Task is invalid: %s' % e) if not underlying_checked.check().ok(): raise InvalidConfig('Job not fully specified: %s' % underlying.check().message()) unbound = [] for ref in refs: if ref == THERMOS_TASK_ID_REF or ref == MESOS_INSTANCE_REF or ( Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)): continue unbound.append(ref) if unbound: raise InvalidConfig('Config contains unbound variables: %s' % ' '.join(map(str, unbound))) cron_schedule = not_empty_or(job.cron_schedule(), '') cron_policy = select_cron_policy(job.cron_policy(), job.cron_collision_policy()) task.executorConfig = ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=filter_aliased_fields(underlying).json_dumps()) return JobConfiguration(key=key, owner=owner, cronSchedule=cron_schedule, cronCollisionPolicy=cron_policy, taskConfig=task, instanceCount=fully_interpolated(job.instances()))