def _query_coordination(self, job_id): """Run Query coordination. Query coordination will poll for query job completion, aggregate data and then spawn a callback job to update the query cached across all workers in the environment. :param job_id: Job Id :type job_id: String """ while True: if self.return_jobs[job_id].failed: self.log.critical("Query job [ %s ] encountered failures.", job_id) return elif all(self.return_jobs[job_id].STDOUT.values()): break self.log.info("Waiting for [ %s ], QUERY to complete", job_id) time.sleep(1) new_task = dict() new_task["skip_cache"] = True new_task["extend_args"] = True new_task["verb"] = "ARG" query_data = dict() for k, v in self.return_jobs[job_id].STDOUT.items(): query_data[k] = json.loads(v) new_task["args"] = {"query": query_data} new_task["parent_async_bypass"] = True new_task["job_id"] = utils.get_uuid() new_task["job_sha3_224"] = utils.object_sha3_224(obj=new_task) new_task["parent_id"] = utils.get_uuid() new_task["parent_sha3_224"] = utils.object_sha3_224(obj=new_task) targets = self._get_available_workers() self.create_return_jobs( task=new_task["job_id"], job_item=new_task, targets=targets, ) for target in targets: self.log.debug( "Queuing QUERY ARG callback job [ %s ] for identity" " [ %s ]", new_task["job_id"], target, ) self.send_queue.put( dict( identity=target, command=new_task["verb"], data=new_task, ))
def wrapper_func(*args, **kwargs): self = args[0] job = kwargs["job"] stdout_arg = job.get("stdout_arg") stderr_arg = job.get("stderr_arg") stdout, stderr, outcome, command = func(*args, **kwargs) if stdout_arg or stderr_arg: self.block_on_tasks = list() clean_info = (stdout.decode() if stdout and isinstance(stdout, bytes) else stdout or "") clean_info_err = (stderr.decode() if stderr and isinstance(stderr, bytes) else stderr or "") arg_job = job.copy() arg_job.pop("parent_sha3_224", None) arg_job.pop("parent_id", None) arg_job.pop("job_sha3_224", None) arg_job.pop("job_id", None) arg_job.pop("stdout_arg", None) arg_job.pop("stderr_arg", None) arg_job["skip_cache"] = True arg_job["extend_args"] = True arg_job["verb"] = "ARG" arg_job["args"] = {} if stdout_arg: arg_job["args"].update({stdout_arg: clean_info.strip()}) if stderr_arg: arg_job["args"].update({stderr_arg: clean_info_err.strip()}) arg_job["parent_async_bypass"] = True arg_job["targets"] = [self.driver.identity] arg_job["job_id"] = utils.get_uuid() arg_job["job_sha3_224"] = utils.object_sha3_224(obj=arg_job) arg_job["parent_id"] = utils.get_uuid() arg_job["parent_sha3_224"] = utils.object_sha3_224(obj=arg_job) self.block_on_tasks.append(arg_job) return stdout, stderr, outcome, command
def put(self, item: typing.Any, block: bool = True, timeout: float = None): """Put a new item within the queue. > The block and timeout options are present for API compatibility, but are otherwise unused. :param item: Object to be entered into the queue. :type item: Object :param block: Force the queue to block attempting to fetch an object. :type block: Boolean :param timeout: Set the block timeout :type timeout: Float """ self._queue[utils.get_uuid()] = item self._count.release()
def client(self, cache, job): """Run query command operation. :param cache: Caching object used to template items within a command. :type cache: Object :param job: Information containing the original job specification. :type job: Dictionary :returns: tuple """ args = cache.get("args") if args: query = args.get(job["query"]) else: query = None arg_job = job.copy() query_item = arg_job.pop("query") targets = arg_job.get("targets", list()) if self.driver.identity in targets: if not job.get("no_wait"): wait_job = dict( skip_cache=True, verb="QUERY_WAIT", item=query_item, query_timeout=600, parent_async_bypass=True, targets=[self.driver.identity], identity=list(), ) wait_job["job_id"] = utils.get_uuid() wait_job["job_sha3_224"] = utils.object_sha3_224(obj=wait_job) wait_job["parent_id"] = arg_job["parent_id"] wait_job["parent_sha3_224"] = utils.object_sha3_224( obj=wait_job ) self.block_on_tasks = [wait_job] return (json.dumps({job["query"]: query}), None, True, job["query"])
def heartbeat_send(self, host_uptime=None, agent_uptime=None, version=None, driver=None): """Send a heartbeat. :param host_uptime: Sender uptime :type host_uptime: String :param agent_uptime: Sender agent uptime :type agent_uptime: String :param version: Sender directord version :type version: String :param version: Driver information :type version: String """ job_id = utils.get_uuid() self.log.info( "Job [ %s ] sending heartbeat from [ %s ] to server", job_id, self.identity, ) return self.job_send( target=self._server_identity, identity=self.identity, control=self.heartbeat_notice, msg_id=job_id, data=json.dumps({ "job_id": job_id, "version": version, "host_uptime": host_uptime, "agent_uptime": agent_uptime, "machine_id": self.machine_id, "driver": driver, }), )
def test_get_uuid(self): uuid1 = utils.get_uuid() uuid.UUID(uuid1, version=4) uuid2 = utils.get_uuid() uuid.UUID(uuid2, version=4) self.assertNotEqual(uuid1, uuid2)
def _socket_send( self, socket, identity=None, msg_id=None, control=None, command=None, data=None, info=None, stderr=None, stdout=None, nonblocking=False, ): """Send a message over a ZM0 socket. The message specification for server is as follows. [ b"Identity" b"ID", b"ASCII Control Characters", b"command", b"data", b"info", b"stderr", b"stdout", ] The message specification for client is as follows. [ b"ID", b"ASCII Control Characters", b"command", b"data", b"info", b"stderr", b"stdout", ] All message information is assumed to be byte encoded. All possible control characters are defined within the Interface class. For more on control characters review the following URL(https://donsnotes.com/tech/charsets/ascii.html#cntrl). :param socket: ZeroMQ socket object. :type socket: Object :param identity: Target where message will be sent. :type identity: Bytes :param msg_id: ID information for a given message. If no ID is provided a UUID will be generated. :type msg_id: Bytes :param control: ASCII control charaters. :type control: Bytes :param command: Command definition for a given message. :type command: Bytes :param data: Encoded data that will be transmitted. :type data: Bytes :param info: Encoded information that will be transmitted. :type info: Bytes :param stderr: Encoded error information from a command. :type stderr: Bytes :param stdout: Encoded output information from a command. :type stdout: Bytes :param nonblocking: Enable non-blocking send. :type nonblocking: Boolean :returns: Object """ def _encoder(item): try: return item.encode() except AttributeError: return item if not msg_id: msg_id = utils.get_uuid() if not control: control = self.nullbyte if not command: command = self.nullbyte if not data: data = self.nullbyte if not info: info = self.nullbyte if not stderr: stderr = self.nullbyte if not stdout: stdout = self.nullbyte message_parts = [msg_id, control, command, data, info, stderr, stdout] if identity: message_parts.insert(0, identity) message_parts = [_encoder(i) for i in message_parts] if nonblocking: flags = zmq.NOBLOCK else: flags = 0 try: return socket.send_multipart(message_parts, flags=flags) except Exception as e: self.log.warn("Failed to send message to [ %s ]", identity) raise e
def exec_orchestrations( self, orchestrations, defined_targets=None, restrict=None, ignore_cache=False, return_raw=False, ): """Execute orchestration jobs. Iterates over a list of orchestartion blobs, fingerprints the jobs, and then runs them. :param orchestrations: List of Dictionaries which are run as orchestartion. :type orchestrations: List :param defined_targets: List of targets to limit a given execution to. This target list provides an override for targets found within a given orchestation. :type defined_targets: List :param restrict: Restrict a given orchestration job to a set of SHA3_224 job fingerprints. :type restrict: Array :param ignore_cache: Instruct the orchestartion job to ignore cached executions. :type ignore_cache: Boolean :param return_raw: Enable a raw return from the server. :type return_raw: Boolean :returns: List """ job_to_run = list() for orchestrate in orchestrations: parent_sha3_224 = utils.object_sha3_224(obj=orchestrate) parent_name = orchestrate.get("name") parent_id = utils.get_uuid() targets = (defined_targets or orchestrate.pop("assign", list()) or orchestrate.get("targets", list())) force_async = getattr(self.args, "force_async", False) if force_async: parent_async = force_async else: try: parent_async = bool( dist_utils.strtobool(orchestrate.get("async", "False"))) except (ValueError, AttributeError): parent_async = bool(orchestrate.get("async", False)) for job in orchestrate["jobs"]: arg_vars = job.pop("vars", None) job_name = job.pop("name", None) assign = job.pop("assign", None) if assign and not isinstance(assign, list): raise SyntaxError( "Job contained an invalid assignment: {} = {}." " Assignments must be in list format.".format( assign, type(assign))) key, value = next(iter(job.items())) job_to_run.append( dict( verb=key, execute=[value], arg_vars=arg_vars, targets=assign or targets, restrict=restrict, ignore_cache=ignore_cache, parent_id=parent_id, parent_sha3_224=parent_sha3_224, parent_name=parent_name, job_name=job_name, return_raw=return_raw, parent_async=parent_async, )) return_data = list() if getattr(self.args, "finger_print", False): count = 0 for job in job_to_run: tabulated_data = list() formatted_job = self.format_action(**job) item = json.loads(formatted_job) exec_str = " ".join(job["execute"]) if len(exec_str) >= 30: exec_str = "{execute}...".format(execute=exec_str[:27]) tabulated_data.extend([ count, job["parent_name"] or job["parent_sha3_224"], item["verb"], exec_str, job["job_name"] or item["job_sha3_224"], ]) return_data.append(tabulated_data) count += 1 utils.print_tabulated_data( data=return_data, headers=["count", "parent", "verb", "exec", "job"], ) return [] else: for job in job_to_run: formatted_job = self.format_action(**job) return_data.append( directord.send_data(socket_path=self.args.socket_path, data=formatted_job)) return return_data
def handle_job( self, command, data, info, ): """Handle a job interaction. :param command: Command :type command: String :param data: Job data :type data: Dictionary :param info: Job info :type info: Dictionary """ job = json.loads(data) job["job_id"] = job_id = job.get("job_id", utils.get_uuid()) job["job_sha3_224"] = job_sha3_224 = job.get( "job_sha3_224", utils.object_sha3_224(job) ) job_parent_id = job.get("parent_id") job_parent_sha3_224 = job.get("parent_sha3_224") self.log.debug( "Item received: parent job UUID [ %s ]," " parent job sha3_224 [ %s ], job UUID [ %s ]," " job sha3_224 [ %s ]", job_parent_id, job_parent_sha3_224, job_id, job_sha3_224, ) with utils.ClientStatus( job_id=job_id, command=command, ctx=self, ) as c: if job_parent_id and not self._parent_check( conn=c, cache=self.cache, job=job ): self.q_return.put( ( None, None, False, "Job omitted, parent failure", job, command, 0, None, ) ) else: c.job_state = self.driver.job_processing component_kwargs = dict(cache=None, job=job) self.log.debug( "Queuing component [ %s ], job_id [ %s ]", command, job_id, ) c.info = "task queued" self.q_processes.put( ( component_kwargs, command, info, ) )
class Interface(directord.Processor): """The Interface class. This class defines everything required to connect to or from a given server. """ uuid = utils.get_uuid() def __init__(self, args): """Initialize the interface class. :param args: Arguments parsed by argparse. :type args: Object """ super(Interface, self).__init__() self.args = args # Set log handlers to debug when enabled. if self.args.debug: self.log.setLevel(logging.DEBUG) for handler in self.log.handlers: handler.setLevel(logging.DEBUG) try: self.heartbeat_interval = self.args.heartbeat_interval except AttributeError: self.heartbeat_interval = 1 self.base_dir = "/etc/directord" self.public_keys_dir = os.path.join(self.base_dir, "public_keys") self.secret_keys_dir = os.path.join(self.base_dir, "private_keys") self.keys_exist = os.path.exists( self.public_keys_dir ) and os.path.exists(self.secret_keys_dir) try: self.driver = self._load_driver(driver=self.args.driver) except AttributeError as e: self.log.warning( "Falling back with dummy driver due error [ %s ] in driver" " [ %s ]. Check the driver CLI arguments, the configuration" " file [ %s ] contents, and ensure all dependencies are" " installed.", str(e), self.args.driver, self.args.config_file, ) self.driver = self._load_driver(driver="dummy") def _load_driver(self, driver): try: _driver = directord.plugin_import( plugin=".drivers.{}".format(driver) ) except Exception as e: raise SystemExit( "Driver was not able to be loaded: {}".format(str(e)) ) else: self.log.debug("Loading messaging driver: [ %s ]", driver) try: return _driver.Driver( args=self.args, encrypted_traffic_data={ "enabled": self.keys_exist, "public_keys_dir": self.public_keys_dir, "secret_keys_dir": self.secret_keys_dir, }, interface=self, ) except NameError as e: raise OSError( "Failed to load driver {} - Error: {} - Check" " configuration and dependency installation.".format( driver, str(e) ) ) from None
def handle_job(self, identity, job_id, control, data, info, stderr, stdout): """Handle a job interaction. :param identity: Client identity :type identity: String :param job_id: Job Id :type job_id: String :param control: Job control character :type control: String :param data: Job data :type data: Dictionary :param info: Job info :type info: Dictionary :param stderr: Job stderr output :type stderr: String :param stdout: Job stdout output :type stdout: String """ self.log.debug( "Job information received [ %s ] from [ %s ]", job_id, identity, ) try: data_item = json.loads(data) except Exception: data_item = dict() self._set_job_status( job_status=control, job_id=job_id, identity=identity, job_output=info, job_stdout=stdout, job_stderr=stderr, execution_time=data_item.get("execution_time", 0), return_timestamp=data_item.get("return_timestamp", 0), component_exec_timestamp=data_item.get("component_exec_timestamp", 0), recv_time=time.time(), ) for new_task in data_item.get("new_tasks", list()): self.log.debug("New task found: %s", new_task) if "targets" in new_task and new_task["targets"]: targets = [i for i in new_task["targets"]] self.log.debug( "Using existing targets from old job" " specification %s", targets, ) else: targets = self._get_available_workers() self.log.debug("Targets undefined in old job specification" " running everwhere") # NOTE(cloudnull): If the new task identity is set but # with a null value, reset the value # to that of the known workers. if "identity" in new_task and not new_task["identity"]: self.log.debug("identities reset to all workers") new_task["identity"] = self._get_available_workers() if "job_id" not in new_task: new_task["job_id"] = utils.get_uuid() self.create_return_jobs( task=new_task["job_id"], job_item=new_task, targets=targets, ) for target in targets: self.log.debug( "Queuing callback job [ %s ] for identity" " [ %s ]", new_task["job_id"], target, ) self.send_queue.put( dict( identity=target, command=new_task["verb"], data=new_task, ))
def run_socket_server(self): """Start a socket server. The socket server is used to broker a connection from the end user into the directord sub-system. The socket server will allow for 1 message of 10M before requiring the client to reconnect. All received data is expected to be JSON serialized data. Before being added to the queue, a task ID and SHA3_224 SUM is added to the content. This is done for tracking and caching purposes. The task ID can be defined in the data. If a task ID is not defined one will be generated. """ try: os.unlink(self.args.socket_path) except OSError: if os.path.exists(self.args.socket_path): raise SystemExit( "Socket path already exists and wasn't able to be" " cleaned up: {}".format(self.args.socket_path)) sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock.settimeout(1) sock.bind(self.args.socket_path) self.log.debug("Socket:%s bound", self.args.socket_path) os.chmod(self.args.socket_path, 509) uid = 0 group = getattr(self.args, "socket_group", "root") try: gid = int(group) except ValueError: gid = grp.getgrnam(group).gr_gid try: os.chown(self.args.socket_path, uid, gid) except PermissionError: uid = os.getuid() self.log.warning( "Default socket bind permission failure. Running with current" " UID: [ %s ]", uid, ) os.chown(self.args.socket_path, uid, gid) sock.listen(1) while True: try: conn, _ = sock.accept() except socket.timeout: if self.driver.event.is_set(): break else: continue with conn: data = conn.recv(409600) data_decoded = data.decode() json_data = json.loads(data_decoded) if "manage" in json_data: self.log.debug("Received manage command: %s", json_data) key, value = next(iter(json_data["manage"].items())) if key == "list_nodes": data = list() for v in self.workers.values(): if v.expired: continue item = v.__dict__ item["expiry"] = v.expiry data.append((v.identity, item)) elif key == "list_jobs": data = list() for k, v in self.return_jobs.items(): data.append( (str(k), self._node_return_info(node_info=v))) elif key == "job_info": data = self.handle_job_info(value) elif key == "purge_nodes": self.workers.clear() data = {"success": True} elif key == "purge_jobs": self.return_jobs.clear() data = {"success": True} else: data = {"failed": True} try: conn.sendall(json.dumps(data).encode()) except BrokenPipeError as e: self.log.error( "Encountered a broken pipe while sending manage" " data. Error:%s, data:%s", str(e), data, ) else: json_data["job_id"] = json_data.get( "job_id", utils.get_uuid()) if "parent_id" not in json_data: json_data["parent_id"] = json_data["job_id"] # Returns the message in reverse to show a return. This # will be a standard client return in JSON format under # normal circomstances. if json_data.get("return_raw", False): msg = json_data["job_id"] else: msg = "Job received. Task ID: {}".format( json_data["job_id"]) try: conn.sendall(msg.encode()) except BrokenPipeError as e: self.log.error( "Encountered a broken pipe while sending job" " data. Error:%s", str(e), ) else: self.log.debug("Data sent to queue [ %s ]", json_data) self.job_queue.put(json_data) if self.driver.event.is_set(): break
def run_job(self): """Run a job interaction. As the job loop executes it will interrogate the job item as returned from the queue. If the item contains a "targets" definition the job loop will only send the message to the given targets, assuming the target is known within the workers object, otherwise all targets will receive the message. If a defined target is not found within the workers object no job will be executed. :returns: Tuple """ self.log.info("Starting run process.") for job_item in self.job_queue.getter(): self.log.debug("Job item received [ %s ]", job_item) restrict_sha3_224 = job_item.get("restrict") if restrict_sha3_224: if job_item["job_sha3_224"] not in restrict_sha3_224: self.log.debug("Job restriction %s is unknown.", restrict_sha3_224) if self.driver.event.is_set(): break else: continue self.log.debug("Processing targets.") user_targets = job_item.pop("targets", []) user_target_difference = set(user_targets) - set( self._get_available_workers()) if user_target_difference: self.log.critical( "Target [ %s ] is unknown. Check the name againt" " the available targets", user_target_difference, ) if not self.return_jobs.get(job_item["job_id"]): self.create_return_jobs( task=job_item["job_id"], job_item=job_item, targets=user_target_difference, ) for target in user_target_difference: self._set_job_status( job_status=self.driver.job_failed, job_id=job_item["job_id"], identity=target, job_output=( "Target unknown. Available targets {}".format( self._get_available_workers())), recv_time=time.time(), ) continue targets = user_targets or self._get_available_workers() if not targets: self.log.error("No known targets defined.") continue if job_item["verb"] == "QUERY": self.log.debug("Query mode enabled.") # NOTE(cloudnull): QUERY runs across the cluster. The # callback tasks are scoped to only # the nodes defined within the job # execution. job_item["targets"] = [i for i in targets] targets = self._get_available_workers() elif job_item.get("run_once", False): self.log.debug("Run once enabled.") targets = job_item["targets"] = [targets[0]] job_id = job_item.get("job_id", utils.get_uuid()) self.create_return_jobs(task=job_id, job_item=job_item, targets=targets) self.log.debug("Processing job [ %s ]", job_item) for identity in targets: if job_item["verb"] in ["ADD", "COPY"]: for file_path in job_item["from"]: job_item["file_sha3_224"] = utils.file_sha3_224( file_path=file_path) if job_item["to"].endswith(os.sep): job_item["file_to"] = os.path.join( job_item["to"], os.path.basename(file_path), ) else: job_item["file_to"] = job_item["to"] self.log.debug( "Queueing file transfer job [ %s ] for" " file_path [ %s ] to identity [ %s ]", job_item["job_id"], file_path, identity, ) self.send_queue.put( dict( identity=identity, command=job_item["verb"], data=job_item, info=file_path, )) else: self.log.debug( "Queuing job [ %s ] for identity [ %s ]", job_item["job_id"], identity, ) self.send_queue.put( dict( identity=identity, command=job_item["verb"], data=job_item, )) if self.driver.event.is_set(): break