def run_script(self, targets): """ Send request to nodes Arguments: targets -- Target nodes, described by Id or Selector """ targets = [t['name'] if isinstance(t, dict) else t for t in targets] env = self.request['env'] for i, t in enumerate(targets): params = has_params(t) if params: for sel_param in params: sel, param = sel_param param_name = param.replace('$', '') if param_name in env: param_val = env[param_name] if isinstance(param_val, list): repl_params = ' '.join( ['%s%s' % (sel, val) for val in param_val]) else: repl_params = sel + param_val targets[i] = t.replace(sel + param, repl_params) remote_user_map = self.request.pop('remote_user_map') target = JobTarget(self.session_id, str(" ".join(targets))) target.hdr.org = remote_user_map['org'] self.start_at = timestamp() self.manager.publisher.send(target._)
def log(self, stdout=None, stderr=None): message = SysMessage(session_id=self.task_id, ts=timestamp(), org=self.org, user=self.user, stdout=stdout, stderr=stderr) self.job_done.send(message._)
def _finalize(self, msg): session_id = msg.session_id org = msg.org result = msg.result try: task = self.db.query(Task).join(User, Org).join(Run, Task.runs).filter( Run.uuid == session_id, Org.name == org).one() if task.group.deployment: task.group.deployment.status = 'Running' run = [r for r in task.runs if r.uuid == session_id][0] success = True run.exec_end = timestamp() task.exec_end = timestamp() if msg.env: run.env_out = json.dumps(msg.env) for node, ret in result.items(): success = success and (str(ret['ret_code']) == '0') rnode = RunNode(name=node, exit_code=ret['ret_code'], as_user=ret['remote_user'], run=run) self.db.add(rnode) if success: run.exit_code = 0 else: run.exit_code = 1 if all([r.exit_code != -99 for r in task.runs]): task.status = LOG_STATUS.Finished task.exit_code = int( any([bool(r.exit_code) for r in task.runs])) self.db.add(run) self.db.add(task) self.db.commit() except Exception, ex: LOG.error(ex) self.db.rollback()
def _end(self, msg): session_id = msg.session_id org = msg.org try: task = self.db.query(Task).join(User, Org).join(Run, Task.runs).filter( Run.uuid == session_id, Org.name == org).one() if task.group.deployment: task.group.deployment.status = 'Running' task.exec_end = timestamp() if all([r.exit_code != -99 for r in task.runs]): task.status = LOG_STATUS.Finished task.exit_code = int( any([bool(r.exit_code) for r in task.runs])) self.db.add(task) self.db.commit() except Exception, ex: LOG.error(ex) self.db.rollback()
def _create_ts(self): ts = timestamp() millis = round(time.time() % 1, 3) ts += millis return ts
def read(self): job_event = Event() user_map = UserMap(self.remote_user_map['roles'], self.user) node_map = self.node_map job_reply = self.manager.backend.publish_queue('out_messages') job_queue = self.manager.backend.consume_queue('in_messages', ident=self.session_id) user_input_queue = self.manager.backend.consume_queue( 'user_input', ident=self.session_id) poller = self.manager.backend.create_poller(job_queue, user_input_queue) discovery_period = time.time() + self.manager.discovery_timeout total_wait = time.time() + (self.timeout or self.manager.wait_timeout) try: while not self.session_event.is_set() and not job_event.is_set(): ready = poller.poll() if user_input_queue in ready: frames = user_input_queue.recv() # input from user -> node """ input_req = JobInput.build(*frames) if not input_req: LOG.warn("Invalid request %s" % frames) continue for node, data in node_map.items(): if input_req.cmd == 'INPUT' and \ (input_req.targets == '*' or node in input_req.targets): job_reply.send( R(data['router_id'], Input.build(self.session_id, input_req.cmd, input_req.data)._)._) """ continue frames = None if job_queue in ready: frames = job_queue.recv() if not frames: if time.time() > discovery_period: # Discovery period ended, check for results if not any([ n['status'] in StatusCodes.pending() for n in node_map.values() ]): break if time.time() > total_wait: _msg = 'Timeout waiting for response from nodes' LOG.warn(_msg) message = SysMessage(session_id=self.task_id, ts=self._create_ts(), org=self.user_org[1], user=self.user_org[0], stdout=_msg) self._reply(message) for node in node_map.values(): if node['status'] != StatusCodes.FINISHED: node['data']['stderr'] = \ node['data'].setdefault('stderr', '') + \ 'Timeout waiting response from node' LOG.debug(node_map) job_event.set() break continue job_rep = M.build(frames[0]) if not job_rep: LOG.error("Invalid reply from node: %s" % frames) continue # Assert we have rep from the same organization if job_rep.hdr.org != self.remote_user_map['org']: continue state = node_map.setdefault( job_rep.hdr.peer, dict(status=StatusCodes.STARTED, data={}, stdout='', stderr='')) ts = self._create_ts() node_map[job_rep.hdr.peer]['router_id'] = job_rep.hdr.ident if isinstance(job_rep, Ready): remote_user = user_map.select(job_rep.hdr.peer) if not remote_user: LOG.info("Node %s not allowed for user %s" % (job_rep.hdr.peer, self.user)) node_map.pop(job_rep.hdr.peer) continue if job_rep.hdr.peer.lower() in self.disabled_nodes: LOG.info("Node %s is disabled" % job_rep.hdr.peer) node_map.pop(job_rep.hdr.peer) continue # Send task to attached node node_map[job_rep.hdr.peer]['remote_user'] = remote_user _msg = "Sending job to %s" % job_rep.hdr.peer LOG.info(_msg) message = SysMessage(session_id=self.task_id, ts=self._create_ts(), org=self.user_org[1], user=self.user_org[0], stdout=_msg) job_msg = Job(self.session_id, remote_user, self.request) job_msg.hdr.ident = job_rep.hdr.ident job_msg.hdr.dest = self.session_id job_reply.send(job_msg._) continue state['status'] = job_rep.control if isinstance(job_rep, Finished): state['data']['elapsed'] = int(timestamp() - self.start_at) state['data']['ret_code'] = job_rep.result['ret_code'] state['data']['env'] = job_rep.result['env'] if job_rep.result['stdout'] or job_rep.result['stderr']: yield ('PIPE', self.session_id, ts, job_rep.run_as, job_rep.hdr.peer, job_rep.result['stdout'], job_rep.result['stderr']) elif isinstance(job_rep, StdOut): yield ('PIPE', self.session_id, ts, job_rep.run_as, job_rep.hdr.peer, job_rep.output, '') elif isinstance(job_rep, StdErr): yield ('PIPE', self.session_id, ts, job_rep.run_as, job_rep.hdr.peer, '', job_rep.output) elif isinstance(job_rep, FileExport): file_name = '%s_%s' % (job_rep.hdr.peer, job_rep.file_name) self.file_exports[file_name] = job_rep.content elif isinstance(job_rep, Events): LOG.info("Polling events for %s" % self.session_id) # else: # job_reply.send(job_rep.ident, job_rep.peer, # self.session_id,'UNKNOWN') LOG.debug('Resp[%s]:: [%s][%s]' % (self.session_id, job_rep.hdr.peer, job_rep.control)) except ConnectionError: # Transport died self.session_event.set() except Exception, ex: LOG.exception(ex)
def _reply(self, message): seq = timestamp() message.seq_no = seq self.job_done.send(message._)
def incr(self, org, what): self.client.put( self.key(TS_NS, org, what), dict(ts=int(timestamp() * 1000)))
def run(self): waiter = Event() targets = [] self.node_connected = self.manager.backend.subscribe_fanout( 'admin_fwd', sub_patterns=[self.org]) message = InitialMessage(session_id=self.session_id, ts=timestamp(), org=self.org, user=self.user) self.job_done.send(message._) wait_for_machines = [] for task in self.tasks: for target in task['targets']: if not isinstance(target, dict): targets.append(target) continue if target['name'] in targets: continue targets.append(target['name']) # Provider value server_name = target.pop('name') self.manager.user = self.manager.db.query(User).filter( User.username == self.user).first() profile = CloudProfile.my(self.manager).filter( CloudProfile.name == target['provider']).first() if not profile: msg = ("Cloud profile: '%s' not found!" % target['provider']) self.log(stderr=msg) raise ValueError(msg) if profile.shared: profile = profile.shared task_group = self.manager.db.query(TaskGroup).filter( TaskGroup.id == self.task_id).first() if not task_group: # Wait 1 more sec for DB sync ? time.sleep(1) task_group = self.manager.db.query(TaskGroup).filter( TaskGroup.id == self.task_id).first() if not task_group: msg = ("Invalid task group: %s" % self.task_id) self.log(stderr=msg) raise ValueError(msg) deployment = task_group.deployment if not deployment: return provider = BaseCloudProvider.find(profile.type) if not provider: msg = ("Cloud profile type '%s' not supported!" % profile.type) self.log(stderr=msg) raise ValueError(msg) try: status, machine_ids, meta = provider( profile, self.log).create_machine( server_name, **target) except Exception, ex: self.log(stderr=str(ex)) else: for m_id in machine_ids: res = Resource(server_name=server_name, server_id=m_id, deployment=deployment, profile=profile, meta=json.dumps(meta)) self.manager.db.add(res) self.manager.db.commit() wait_for_machines.append(server_name) # Return arg to dict target['name'] = server_name targets.append(server_name)
def incr(self, org, what): self.client.put(self.key(TS_NS, org, what), dict(ts=int(timestamp() * 1000)))
def resume(self, user_id=None, task_uuid=None, step=None, **kwargs): # DEFUNCT try: self._prepare_db(user_id=user_id, **kwargs) self.db.begin(subtransactions=True) ctx = self.get_user_ctx(user_id) task = Task.visible(ctx).filter(Task.uuid == task_uuid).one() task_runs = list(task.runs) task.id = None task.uuid = None task.status = LOG_STATUS.Running task.exit_code = -99 self.db.expunge(task) make_transient(task) to_remove = [] for run in sorted(task_runs, key=lambda r: r.step_index): if run.step_index < step: to_remove.append(run) else: break for run in to_remove: task_runs.remove(run) if not task_runs: return O.error(msg="No step found to resume") remote_tasks = [] atts = [] for i, run in enumerate(task_runs): run.step_index = i remote_task = dict(attachments=atts, body=run.full_script) remote_task['timeout'] = run.timeout # remote_task['target'] = self._expand_target(run.target) remote_tasks.append(remote_task) run.id = None run.uuid = None run.exit_code = -99 run.exec_start = timestamp() run.exec_user_id = ctx.user.id self.db.expunge(run) make_transient(run) env = kwargs.get("env", {}) if env and not isinstance(env, dict): try: env = json.loads(env) except: env = {} try: env_in = {} if task_runs[0].env_in: env_in = json.loads(task_runs[0].env_in) or {} if env_in: env_in.update(env) env = env_in except Exception, ex: LOG.exception(ex) msg = Master(ctx.user.name).command( 'dispatch', tasks=remote_tasks, task_id=task.group.id, roles=self._roles(ctx), disabled_nodes=self.disabled_nodes(ctx), includes=[], attachments=[], env=env) if not isinstance(msg, Queued): return task.created_at = datetime.now() task.uuid = msg.task_ids[0] for i, job_id in enumerate(msg.task_ids): # Update run = task_runs[i] run.uuid = job_id self.db.add(run) task.runs.append(run) self.db.add(task) self.db.commit() return O.task_ids(_list=msg.task_ids)
def execute(self, user_id, deployment, revision=None, **kwargs): LOG.info("Starting %s " % deployment.name) self._prepare_db(user_id=user_id, **kwargs) self.db.begin(subtransactions=True) remote_tasks = [] local_runs = [] # batch_id = None ctx = self.get_user_ctx(user_id) try: tags = kwargs.get('tags', []) if tags: tags = sorted(re.split(r'[\s,;]', tags)) timeout = kwargs.get('timeout', 0) env = kwargs.get('env') or {} if env and not isinstance(env, dict): kwargs['env'] = env = json.loads(env) if deployment.env: deployment.env.update(env) env = deployment.env group = TaskGroup(deployment=deployment.object) self.db.add(group) LOG.info("Execute %s by %s" % (deployment.name, ctx.user.name)) task = Task(status=LOG_STATUS.Running, group=group, owner_id=ctx.user.id, script_content=revision, exec_start=timestamp(), timeout=timeout, exit_code=-99, script_name=deployment.name) self.db.add(task) for tag in tags: task.tags.append(Tag(name=tag)) for i, step in enumerate(deployment.steps): targets = step.targets remote_task = dict(attachments=step.atts, body=step.body, lang=step.lang, targets=targets, timeout=step.timeout, env=step.env or {}) flat_targets = [] for t in targets: if isinstance(t, dict): flat_targets.append("%(provider)s::%(name)s" % t) else: flat_targets.append(t) run = Run(task=task, lang=step.lang, exec_start=timestamp(), exec_user_id=ctx.user.id, target=" ".join(flat_targets), exit_code=-99, timeout=step.timeout, step_index=i, full_script=step.body) if i == 0: run.env_in = json.dumps(env) self.db.add(task) remote_tasks.append(remote_task) local_runs.append(run) self.db.add(run) self.db.commit() if not remote_tasks: return self.db.begin(subtransactions=True) msg = Master(ctx.user.name).command( 'dispatch', task_id=group.id, tasks=remote_tasks, roles=self._roles(ctx), disabled_nodes=self.disabled_nodes(ctx), includes=[], attachments=[], env=env) if not isinstance(msg, Queued): return LOG.info("TASK UUIDs: %s" % msg.task_ids) for i, job_id in enumerate(msg.task_ids): # Update run = local_runs[i] run.uuid = job_id if msg.task_ids: self.db.commit() return O._anon(task_uid=task.uuid, group=task.taskgroup_id, parent_uid=task.uuid) except Exception, ex: LOG.exception(ex) self.db.rollback()
def run(self): waiter = Event() targets = [] self.node_connected = self.manager.backend.subscribe_fanout( 'admin_fwd', sub_patterns=[self.org]) message = InitialMessage(session_id=self.session_id, ts=timestamp(), org=self.org, user=self.user) self.job_done.send(message._) wait_for_machines = [] for task in self.tasks: for target in task['targets']: if not isinstance(target, dict): targets.append(target) continue if target['name'] in targets: continue targets.append(target['name']) # Provider value server_name = target.pop('name') self.manager.user = self.manager.db.query(User).filter( User.username == self.user).first() profile = CloudProfile.my(self.manager).filter( CloudProfile.name == target['provider']).first() if not profile: msg = ("Cloud profile: '%s' not found!" % target['provider']) self.log(stderr=msg) raise ValueError(msg) if profile.shared: profile = profile.shared task_group = self.manager.db.query(TaskGroup).filter( TaskGroup.id == self.task_id).first() if not task_group: # Wait 1 more sec for DB sync ? time.sleep(1) task_group = self.manager.db.query(TaskGroup).filter( TaskGroup.id == self.task_id).first() if not task_group: msg = ("Invalid task group: %s" % self.task_id) self.log(stderr=msg) raise ValueError(msg) deployment = task_group.deployment if not deployment: return provider = BaseCloudProvider.find(profile.type) if not provider: msg = ("Cloud profile type '%s' not supported!" % profile.type) self.log(stderr=msg) raise ValueError(msg) try: status, machine_ids, meta = provider( profile, self.log).create_machine(server_name, **target) except Exception, ex: self.log(stderr=str(ex)) else: for m_id in machine_ids: res = Resource(server_name=server_name, server_id=m_id, deployment=deployment, profile=profile, meta=json.dumps(meta)) self.manager.db.add(res) self.manager.db.commit() wait_for_machines.append(server_name) # Return arg to dict target['name'] = server_name targets.append(server_name)