class DistributedCommandRunner(object): @classmethod def make_executor_path(cls, cluster, executor_name): parameters = cls.sandbox_args(cluster) parameters.update(executor_name=executor_name) return posixpath.join( '%(slave_root)s', 'slaves/*/frameworks/*/executors/%(executor_name)s/runs', '%(slave_run_directory)s') % parameters @classmethod def thermos_sandbox(cls, cluster, executor_sandbox=False): sandbox = cls.make_executor_path(cluster, 'thermos-{{thermos.task_id}}') return sandbox if executor_sandbox else posixpath.join( sandbox, 'sandbox') @classmethod def sandbox_args(cls, cluster): cluster = cluster.with_trait(CommandRunnerTrait) return { 'slave_root': cluster.slave_root, 'slave_run_directory': cluster.slave_run_directory } @classmethod def substitute_thermos(cls, command, task, cluster, **kw): prefix_command = 'cd %s;' % cls.thermos_sandbox(cluster, **kw) thermos_namespace = ThermosContext( task_id=task.assignedTask.taskId, ports=task.assignedTask.assignedPorts) mesos_namespace = MesosContext(instance=task.assignedTask.instanceId) command = String(prefix_command + command) % Environment( thermos=thermos_namespace, mesos=mesos_namespace) return command.get() @classmethod def aurora_sandbox(cls, cluster, executor_sandbox=False): if executor_sandbox: return cls.make_executor_path(cluster, 'twitter') else: return '/var/run/nexus/%task_id%/sandbox' @classmethod def substitute_aurora(cls, command, task, cluster, **kw): command = ('cd %s;' % cls.aurora_sandbox(cluster, **kw)) + command command = command.replace('%shard_id%', str(task.assignedTask.instanceId)) command = command.replace('%task_id%', task.assignedTask.taskId) for name, port in task.assignedTask.assignedPorts.items(): command = command.replace('%port:' + name + '%', str(port)) return command @classmethod def substitute(cls, command, task, cluster, **kw): if task.assignedTask.task.executorConfig: return cls.substitute_thermos(command, task, cluster, **kw) else: return cls.substitute_aurora(command, task, cluster, **kw) @classmethod def query_from(cls, role, env, job): return TaskQuery( statuses=LIVE_STATES, jobKeys=[JobKey(role=role, environment=env, name=job)]) def __init__(self, cluster, role, env, jobs, ssh_user=None, log_fn=log.log): self._cluster = cluster self._api = AuroraClientAPI(cluster=cluster, user_agent=AURORA_V2_USER_AGENT_NAME) self._role = role self._env = env self._jobs = jobs self._ssh_user = ssh_user if ssh_user else self._role self._log = log_fn def execute(self, args): hostname, role, command = args ssh_command = ['ssh', '-n', '-q', '%s@%s' % (role, hostname), command] self._log(logging.DEBUG, "Running command: %s" % ssh_command) po = subprocess.Popen(ssh_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = po.communicate() return '\n'.join('%s: %s' % (hostname, line) for line in output[0].splitlines()) def resolve(self): for job in self._jobs: resp = self._api.query(self.query_from(self._role, self._env, job)) if resp.responseCode != ResponseCode.OK: self._log(logging.ERROR, 'Failed to query job: %s' % job) continue for task in resp.result.scheduleStatusResult.tasks: yield task def process_arguments(self, command, **kw): for task in self.resolve(): host = task.assignedTask.slaveHost yield (host, self._ssh_user, self.substitute(command, task, self._cluster, **kw)) def run(self, command, parallelism=1, **kw): threadpool = ThreadPool(processes=parallelism) for result in threadpool.imap_unordered( self.execute, self.process_arguments(command, **kw)): print(result)
class DistributedCommandRunner(object): @classmethod def make_executor_path(cls, cluster, executor_name): parameters = cls.sandbox_args(cluster) parameters.update(executor_name=executor_name) return ( posixpath.join( "%(slave_root)s", "slaves/*/frameworks/*/executors/%(executor_name)s/runs", "%(slave_run_directory)s" ) % parameters ) @classmethod def thermos_sandbox(cls, cluster, executor_sandbox=False): sandbox = cls.make_executor_path(cluster, "thermos-{{thermos.task_id}}") return sandbox if executor_sandbox else posixpath.join(sandbox, "sandbox") @classmethod def sandbox_args(cls, cluster): cluster = cluster.with_trait(CommandRunnerTrait) return {"slave_root": cluster.slave_root, "slave_run_directory": cluster.slave_run_directory} @classmethod def substitute_thermos(cls, command, task, cluster, **kw): prefix_command = "cd %s;" % cls.thermos_sandbox(cluster, **kw) thermos_namespace = ThermosContext(task_id=task.assignedTask.taskId, ports=task.assignedTask.assignedPorts) mesos_namespace = MesosContext(instance=task.assignedTask.instanceId) command = String(prefix_command + command) % Environment(thermos=thermos_namespace, mesos=mesos_namespace) return command.get() @classmethod def aurora_sandbox(cls, cluster, executor_sandbox=False): if executor_sandbox: return cls.make_executor_path(cluster, "twitter") else: return "/var/run/nexus/%task_id%/sandbox" @classmethod def substitute_aurora(cls, command, task, cluster, **kw): command = ("cd %s;" % cls.aurora_sandbox(cluster, **kw)) + command command = command.replace("%shard_id%", str(task.assignedTask.instanceId)) command = command.replace("%task_id%", task.assignedTask.taskId) for name, port in task.assignedTask.assignedPorts.items(): command = command.replace("%port:" + name + "%", str(port)) return command @classmethod def substitute(cls, command, task, cluster, **kw): if task.assignedTask.task.executorConfig: return cls.substitute_thermos(command, task, cluster, **kw) else: return cls.substitute_aurora(command, task, cluster, **kw) @classmethod def query_from(cls, role, env, job): return TaskQuery(statuses=LIVE_STATES, jobKeys=[JobKey(role=role, environment=env, name=job)]) def __init__(self, cluster, role, env, jobs, ssh_user=None, log_fn=log.log): self._cluster = cluster self._api = AuroraClientAPI(cluster=cluster) self._role = role self._env = env self._jobs = jobs self._ssh_user = ssh_user if ssh_user else self._role self._log = log_fn def execute(self, args): hostname, role, command = args ssh_command = ["ssh", "-n", "-q", "%s@%s" % (role, hostname), command] self._log(logging.DEBUG, "Running command: %s" % ssh_command) po = subprocess.Popen(ssh_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = po.communicate() return "\n".join("%s: %s" % (hostname, line) for line in output[0].splitlines()) def resolve(self): for job in self._jobs: resp = self._api.query(self.query_from(self._role, self._env, job)) if resp.responseCode != ResponseCode.OK: self._log(logging.ERROR, "Failed to query job: %s" % job) continue for task in resp.result.scheduleStatusResult.tasks: yield task def process_arguments(self, command, **kw): for task in self.resolve(): host = task.assignedTask.slaveHost yield (host, self._ssh_user, self.substitute(command, task, self._cluster, **kw)) def run(self, command, parallelism=1, **kw): threadpool = ThreadPool(processes=parallelism) for result in threadpool.imap_unordered(self.execute, self.process_arguments(command, **kw)): print(result)
def query(args, options): """usage: query [--force] [--listformat=FORMAT] [--shards=N[,N,...]] [--states=State[,State,...]] cluster [role [job]] Query Mesos about jobs and tasks. """ def _convert_fmt_string(fmtstr): import re def convert(match): return "%%(%s)s" % match.group(1) return re.sub(r"%(\w+)%", convert, fmtstr) def flatten_task(t, d={}): for key in t.__dict__.keys(): val = getattr(t, key) try: val.__dict__.keys() except AttributeError: d[key] = val else: flatten_task(val, d) return d def map_values(d): default_value = lambda v: v mapping = {"status": lambda v: ScheduleStatus._VALUES_TO_NAMES[v]} return dict((k, mapping.get(k, default_value)(v)) for (k, v) in d.items()) for state in options.states.split(","): if state not in ScheduleStatus._NAMES_TO_VALUES: msg = "Unknown state '%s' specified. Valid states are:\n" % state msg += ",".join(ScheduleStatus._NAMES_TO_VALUES.keys()) die(msg) # Role, Job, Instances, States, and the listformat if len(args) == 0: die("Must specify at least cluster.") cluster = args[0] role = args[1] if len(args) > 1 else None job = args[2] if len(args) > 2 else None instances = set(map(int, options.shards.split(","))) if options.shards else set() if options.states: states = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(","))) else: states = ACTIVE_STATES | TERMINAL_STATES listformat = _convert_fmt_string(options.listformat) # Figure out "expensive" queries here and bone if they do not have --force # - Does not specify role if not role and not options.force: die("--force is required for expensive queries (no role specified)") # - Does not specify job if not job and not options.force: die("--force is required for expensive queries (no job specified)") # - Specifies status outside of ACTIVE_STATES if not (states <= ACTIVE_STATES) and not options.force: die("--force is required for expensive queries (states outside ACTIVE states") api = AuroraClientAPI(CLUSTERS[cluster], options.verbosity) query_info = api.query(api.build_query(role, job, instances=instances, statuses=states)) if query_info.responseCode != ResponseCode.OK: die("Failed to query scheduler: %s" % query_info.messageDEPRECATED) tasks = query_info.result.scheduleStatusResult.tasks if tasks is None: return try: for task in tasks: d = flatten_task(task) print(listformat % map_values(d)) except KeyError: msg = "Unknown key in format string. Valid keys are:\n" msg += ",".join(d.keys()) die(msg)
def query(args, options): """usage: query [--force] [--listformat=FORMAT] [--shards=N[,N,...]] [--states=State[,State,...]] cluster [role [job]] Query Mesos about jobs and tasks. """ def _convert_fmt_string(fmtstr): import re def convert(match): return "%%(%s)s" % match.group(1) return re.sub(r'%(\w+)%', convert, fmtstr) def flatten_task(t, d={}): for key in t.__dict__.keys(): val = getattr(t, key) try: val.__dict__.keys() except AttributeError: d[key] = val else: flatten_task(val, d) return d def map_values(d): default_value = lambda v: v mapping = { 'status': lambda v: ScheduleStatus._VALUES_TO_NAMES[v], } return dict( (k, mapping.get(k, default_value)(v)) for (k, v) in d.items() ) for state in options.states.split(','): if state not in ScheduleStatus._NAMES_TO_VALUES: msg = "Unknown state '%s' specified. Valid states are:\n" % state msg += ','.join(ScheduleStatus._NAMES_TO_VALUES.keys()) die(msg) # Role, Job, Instances, States, and the listformat if len(args) == 0: die('Must specify at least cluster.') cluster = args[0] role = args[1] if len(args) > 1 else None job = args[2] if len(args) > 2 else None instances = set(map(int, options.shards.split(','))) if options.shards else set() if options.states: states = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(','))) else: states = ACTIVE_STATES | TERMINAL_STATES listformat = _convert_fmt_string(options.listformat) # Figure out "expensive" queries here and bone if they do not have --force # - Does not specify role if not role and not options.force: die('--force is required for expensive queries (no role specified)') # - Does not specify job if not job and not options.force: die('--force is required for expensive queries (no job specified)') # - Specifies status outside of ACTIVE_STATES if not (states <= ACTIVE_STATES) and not options.force: die('--force is required for expensive queries (states outside ACTIVE states') api = AuroraClientAPI(CLUSTERS[cluster], options.verbosity) query_info = api.query(api.build_query(role, job, instances=instances, statuses=states)) if query_info.responseCode != ResponseCode.OK: die('Failed to query scheduler: %s' % query_info.messageDEPRECATED) tasks = query_info.result.scheduleStatusResult.tasks if tasks is None: return try: for task in tasks: d = flatten_task(task) print(listformat % map_values(d)) except KeyError: msg = "Unknown key in format string. Valid keys are:\n" msg += ','.join(d.keys()) die(msg)
class DistributedCommandRunner(object): @staticmethod def execute(args): hostname, role, command = args ssh_command = ['ssh', '-n', '-q', '%s@%s' % (role, hostname), command] po = subprocess.Popen(ssh_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = po.communicate() return '\n'.join('%s: %s' % (hostname, line) for line in output[0].splitlines()) @classmethod def make_executor_path(cls, cluster, executor_name): parameters = cls.sandbox_args(cluster) parameters.update(executor_name=executor_name) return posixpath.join( '%(slave_root)s', 'slaves/*/frameworks/*/executors/%(executor_name)s/runs', '%(slave_run_directory)s' ) % parameters @classmethod def thermos_sandbox(cls, cluster, executor_sandbox=False): sandbox = cls.make_executor_path(cluster, 'thermos-{{thermos.task_id}}') return sandbox if executor_sandbox else posixpath.join(sandbox, 'sandbox') @classmethod def sandbox_args(cls, cluster): cluster = cluster.with_trait(CommandRunnerTrait) return {'slave_root': cluster.slave_root, 'slave_run_directory': cluster.slave_run_directory} @classmethod def substitute_thermos(cls, command, task, cluster, **kw): prefix_command = 'cd %s;' % cls.thermos_sandbox(cluster, **kw) thermos_namespace = ThermosContext( task_id=task.assignedTask.taskId, ports=task.assignedTask.assignedPorts) mesos_namespace = MesosContext(instance=task.assignedTask.instanceId) command = String(prefix_command + command) % Environment( thermos=thermos_namespace, mesos=mesos_namespace) return command.get() @classmethod def aurora_sandbox(cls, cluster, executor_sandbox=False): if executor_sandbox: return cls.make_executor_path(cluster, 'twitter') else: return '/var/run/nexus/%task_id%/sandbox' @classmethod def substitute_aurora(cls, command, task, cluster, **kw): command = ('cd %s;' % cls.aurora_sandbox(cluster, **kw)) + command command = command.replace('%shard_id%', str(task.assignedTask.instanceId)) command = command.replace('%task_id%', task.assignedTask.taskId) for name, port in task.assignedTask.assignedPorts.items(): command = command.replace('%port:' + name + '%', str(port)) return command @classmethod def substitute(cls, command, task, cluster, **kw): if task.assignedTask.task.executorConfig: return cls.substitute_thermos(command, task, cluster, **kw) else: return cls.substitute_aurora(command, task, cluster, **kw) @classmethod def query_from(cls, role, env, job): return TaskQuery(statuses=LIVE_STATES, owner=Identity(role), jobName=job, environment=env) def __init__(self, cluster, role, env, jobs, ssh_user=None): self._cluster = cluster self._api = AuroraClientAPI(cluster=cluster) self._role = role self._env = env self._jobs = jobs self._ssh_user = ssh_user if ssh_user else self._role def resolve(self): for job in self._jobs: resp = self._api.query(self.query_from(self._role, self._env, job)) if resp.responseCode != ResponseCode.OK: log.error('Failed to query job: %s' % job) continue for task in resp.result.scheduleStatusResult.tasks: yield task def process_arguments(self, command, **kw): for task in self.resolve(): host = task.assignedTask.slaveHost role = task.assignedTask.task.owner.role yield (host, self._ssh_user, self.substitute(command, task, self._cluster, **kw)) def run(self, command, parallelism=1, **kw): threadpool = ThreadPool(processes=parallelism) for result in threadpool.imap_unordered(self.execute, self.process_arguments(command, **kw)): print result
class DistributedCommandRunner(object): @classmethod def make_executor_path(cls, cluster, executor_name): parameters = cls.sandbox_args(cluster) parameters.update(executor_name=executor_name) return posixpath.join( '%(slave_root)s', 'slaves/*/frameworks/*/executors/%(executor_name)s/runs', '%(slave_run_directory)s' ) % parameters @classmethod def thermos_sandbox(cls, cluster, executor_sandbox=False): sandbox = cls.make_executor_path(cluster, 'thermos-{{thermos.task_id}}') return sandbox if executor_sandbox else posixpath.join(sandbox, 'sandbox') @classmethod def sandbox_args(cls, cluster): cluster = cluster.with_trait(CommandRunnerTrait) return {'slave_root': cluster.slave_root, 'slave_run_directory': cluster.slave_run_directory} @classmethod def substitute(cls, command, task, cluster, **kw): prefix_command = 'cd %s;' % cls.thermos_sandbox(cluster, **kw) thermos_namespace = ThermosContext( task_id=task.assignedTask.taskId, ports=task.assignedTask.assignedPorts) mesos_namespace = MesosContext(instance=task.assignedTask.instanceId) command = String(prefix_command + command) % Environment( thermos=thermos_namespace, mesos=mesos_namespace) return command.get() @classmethod def query_from(cls, role, env, job): return TaskQuery(statuses=LIVE_STATES, jobKeys=[JobKey(role=role, environment=env, name=job)]) def __init__(self, cluster, role, env, jobs, ssh_user=None, log_fn=log.log): self._cluster = cluster self._api = AuroraClientAPI( cluster=cluster, user_agent=AURORA_V2_USER_AGENT_NAME) self._role = role self._env = env self._jobs = jobs self._ssh_user = ssh_user if ssh_user else self._role self._log = log_fn def execute(self, args): hostname, role, command = args ssh_command = ['ssh', '-n', '-q', '%s@%s' % (role, hostname), command] self._log(logging.DEBUG, "Running command: %s" % ssh_command) po = subprocess.Popen(ssh_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = po.communicate() return '\n'.join('%s: %s' % (hostname, line) for line in output[0].splitlines()) def resolve(self): for job in self._jobs: resp = self._api.query(self.query_from(self._role, self._env, job)) if resp.responseCode != ResponseCode.OK: self._log(logging.ERROR, 'Failed to query job: %s' % job) continue for task in resp.result.scheduleStatusResult.tasks: yield task def process_arguments(self, command, **kw): for task in self.resolve(): host = task.assignedTask.slaveHost yield (host, self._ssh_user, self.substitute(command, task, self._cluster, **kw)) def run(self, command, parallelism=1, **kw): threadpool = ThreadPool(processes=parallelism) for result in threadpool.imap_unordered(self.execute, self.process_arguments(command, **kw)): print(result)