Exemplo n.º 1
0
 def substitute_thermos(cls, command, task, cluster, **kw):
     prefix_command = 'cd %s;' % cls.thermos_sandbox(cluster, **kw)
     thermos_namespace = ThermosContext(
         task_id=task.assignedTask.taskId,
         ports=task.assignedTask.assignedPorts)
     mesos_namespace = MesosContext(instance=task.assignedTask.instanceId)
     command = String(prefix_command + command) % Environment(
         thermos=thermos_namespace, mesos=mesos_namespace)
     return command.get()
Exemplo n.º 2
0
 def context(self, task_id):
   state = self.state
   if state.header is None:
     return None
   return ThermosContext(
     ports=state.header.ports if state.header.ports else {},
     task_id=state.header.task_id,
     user=state.header.user,
   )
Exemplo n.º 3
0
    def interpolate_cmd(task, cmd):
        """
    :param task: Assigned task passed from Mesos Agent
    :param cmd: Command defined inside shell_command inside config.
    :return: Interpolated cmd with filled in values, for example ports.
    """
        thermos_namespace = ThermosContext(task_id=task.taskId,
                                           ports=task.assignedPorts)
        mesos_namespace = MesosContext(instance=task.instanceId)
        command = String(cmd) % Environment(thermos=thermos_namespace,
                                            mesos=mesos_namespace)

        return command.get()
Exemplo n.º 4
0
    def _build_path(context, target):
        (task_instance,
         path) = ScpCommand._extract_task_instance_and_path(context, target)

        # No jobkey is specified therefore we are using a local path.
        if (task_instance is None):
            return path

        # Jobkey specified, we want to convert to the user@host:file scp format
        (cluster, role, env, name) = task_instance.jobkey
        instance = set([task_instance.instance])
        api = context.get_api(cluster)
        resp = api.query(
            api.build_query(role, name, env=env, instances=instance))
        context.log_response_and_raise(
            resp,
            err_msg=('Unable to get information about instance: %s' %
                     combine_messages(resp)))
        if (resp.result.scheduleStatusResult.tasks is None
                or len(resp.result.scheduleStatusResult.tasks) == 0):
            raise context.CommandError(
                EXIT_INVALID_PARAMETER, ScpCommand.JOB_NOT_FOUND_ERROR_MSG %
                (task_instance.jobkey, task_instance.instance))
        first_task = resp.result.scheduleStatusResult.tasks[0]
        assigned = first_task.assignedTask
        role = assigned.task.job.role
        slave_host = assigned.slaveHost

        # If path is absolute, use that. Else if it is a tilde expansion, throw an error.
        # Otherwise, use sandbox as relative root.
        normalized_input_path = os.path.normpath(path)
        if (os.path.isabs(normalized_input_path)):
            final_path = normalized_input_path
        elif (normalized_input_path.startswith('~/')
              or normalized_input_path == '~'):
            raise context.CommandError(EXIT_INVALID_PARAMETER,
                                       ScpCommand.TILDE_USAGE_ERROR_MSG % path)
        else:
            sandbox_path_pre_format = DistributedCommandRunner.thermos_sandbox(
                api.cluster, executor_sandbox=context.options.executor_sandbox)
            thermos_namespace = ThermosContext(task_id=assigned.taskId,
                                               ports=assigned.assignedPorts)
            sandbox_path = String(sandbox_path_pre_format) % Environment(
                thermos=thermos_namespace)
            # Join the individual folders to the sandbox path to build safely
            final_path = os.path.join(str(sandbox_path),
                                      *normalized_input_path.split(os.sep))

        return '%s@%s:%s' % (role, slave_host, final_path)
Exemplo n.º 5
0
    def assert_all_refs_bound(cls, task):
        port_names = PortExtractor.extract(task)

        # Create fake bindings and make sure that there are no unbound refs afterwards.  If
        # there are unbound refs that could indicate improper scoping e.g.
        # {{array[{{mesos.instance}}]}} which is disallowed.
        thermos_bindings = ThermosContext(
            task_id='dummy_task_id',
            user='******',
            ports=dict(
                (name, random.randrange(30000, 40000)) for name in port_names),
        )
        task_instance, unbindable_refs = (
            task % dict(thermos=thermos_bindings)).interpolate()

        if len(unbindable_refs) != 0:
            raise cls.InvalidTaskError(
                'Unexpected unbound refs: %s. Make sure you are not nesting template variables.'
                % ' '.join(map(str, unbindable_refs)))
Exemplo n.º 6
0
    def __init__(self,
                 task,
                 checkpoint_root,
                 sandbox,
                 log_dir=None,
                 task_id=None,
                 portmap=None,
                 user=None,
                 chroot=False,
                 clock=time,
                 universal_handler=None,
                 planner_class=TaskPlanner,
                 hostname=None,
                 process_logger_destination=None,
                 process_logger_mode=None,
                 rotate_log_size_mb=None,
                 rotate_log_backups=None,
                 preserve_env=False):
        """
      required:
        task (config.Task) = the task to run
        checkpoint_root (path) = the checkpoint root
        sandbox (path) = the sandbox in which the path will be run
                         [if None, cwd will be assumed, but garbage collection will be
                          disabled for this task.]

      optional:
        log_dir (string)  = directory to house stdout/stderr logs. If not specified, logs will be
                            written into the sandbox directory under .logs/
        task_id (string)  = bind to this task id.  if not specified, will synthesize an id based
                            upon task.name()
        portmap (dict)    = a map (string => integer) from name to port, e.g. { 'http': 80 }
        user (string)     = the user to run the task as.  if not current user, requires setuid
                            privileges.
        chroot (boolean)  = whether or not to chroot into the sandbox prior to exec.
        clock (time interface) = the clock to use throughout
        universal_handler = checkpoint record handler (only used for testing)
        planner_class (TaskPlanner class) = TaskPlanner class to use for constructing the task
                            planning policy.
        process_logger_destination (string) = The destination of logger to use for all processes.
        process_logger_mode (string) = The mode of logger to use for all processes.
        rotate_log_size_mb (integer) = The maximum size of the rotated stdout/stderr logs in MiB.
        rotate_log_backups (integer) = The maximum number of rotated stdout/stderr log backups.
        preserve_env (boolean) = whether or not env variables for the runner should be in the
                                 env for the task being run
    """
        if not issubclass(planner_class, TaskPlanner):
            raise TypeError('planner_class must be a TaskPlanner.')
        self._clock = clock
        launch_time = self._clock.time()
        launch_time_ms = '%06d' % int(
            (launch_time - int(launch_time)) * (10**6))
        if not task_id:
            self._task_id = '%s-%s.%s' % (
                task.name(),
                time.strftime('%Y%m%d-%H%M%S',
                              time.localtime(launch_time)), launch_time_ms)
        else:
            self._task_id = task_id
        current_user = TaskRunnerHelper.get_actual_user()
        self._user = user or current_user
        # TODO(wickman) This should be delegated to the ProcessPlatform / Helper
        if self._user != current_user:
            if os.geteuid() != 0:
                raise ValueError(
                    'task specifies user as %s, but %s does not have setuid permission!'
                    % (self._user, current_user))
        self._portmap = portmap or {}
        self._launch_time = launch_time
        self._log_dir = log_dir or os.path.join(sandbox, '.logs')
        self._process_logger_destination = process_logger_destination
        self._process_logger_mode = process_logger_mode
        self._rotate_log_size_mb = rotate_log_size_mb
        self._rotate_log_backups = rotate_log_backups
        self._pathspec = TaskPath(root=checkpoint_root,
                                  task_id=self._task_id,
                                  log_dir=self._log_dir)
        self._hostname = hostname or socket.gethostname()
        try:
            ThermosTaskValidator.assert_valid_task(task)
            ThermosTaskValidator.assert_valid_ports(task, self._portmap)
        except ThermosTaskValidator.InvalidTaskError as e:
            raise self.InvalidTask('Invalid task: %s' % e)
        context = ThermosContext(task_id=self._task_id,
                                 ports=self._portmap,
                                 user=self._user)
        self._task, uninterp = (task %
                                Environment(thermos=context)).interpolate()
        if len(uninterp) > 0:
            raise self.InvalidTask('Failed to interpolate task, missing: %s' %
                                   ', '.join(str(ref) for ref in uninterp))
        try:
            ThermosTaskValidator.assert_same_task(self._pathspec, self._task)
        except ThermosTaskValidator.InvalidTaskError as e:
            raise self.InvalidTask('Invalid task: %s' % e)
        self._plan = None  # plan currently being executed (updated by Handlers)
        self._regular_plan = planner_class(
            self._task,
            clock=clock,
            process_filter=lambda proc: proc.final().get() is False)
        self._finalizing_plan = planner_class(
            self._task,
            clock=clock,
            process_filter=lambda proc: proc.final().get() is True)
        self._chroot = chroot
        self._sandbox = sandbox
        self._terminal_state = None
        self._ckpt = None
        self._process_map = dict(
            (p.name().get(), p) for p in self._task.processes())
        self._task_processes = {}
        self._stages = dict(
            (state, stage(self)) for state, stage in self.STAGES.items())
        self._finalization_start = None
        self._preemption_deadline = None
        self._watcher = ProcessMuxer(self._pathspec)
        self._state = RunnerState(processes={})
        self._preserve_env = preserve_env

        # create runner state
        universal_handler = universal_handler or TaskRunnerUniversalHandler
        self._dispatcher = CheckpointDispatcher()
        self._dispatcher.register_handler(universal_handler(self))
        self._dispatcher.register_handler(TaskRunnerProcessHandler(self))
        self._dispatcher.register_handler(TaskRunnerTaskHandler(self))

        # recover checkpointed runner state and update plan
        self._recovery = True
        self._replay_runner_ckpt()