def _run(self): """ Threads callback func, run forever to handle jobs from the job queue """ work_queue = self._work_queue count_lock = self._count_lock while 1: logger.debug("Going to get job") func = work_queue.get() if func is None: break if not self._started: break logger.debug("Going to exec job") with count_lock: self._occupied_threads += 1 try: func() except Exception: logger.error(traceback.format_exc()) with count_lock: self._occupied_threads -= 1 logger.debug("Done with exec job") logger.info("Thread work_queue_size=%d", work_queue.qsize()) logger.debug("Worker thread %s stopped.", threading.current_thread().getName())
def parse_modinput_configs(config_str): """ @config_str: modinput XML configuration feed by splunkd @return: meta_config and stanza_config """ import xml.dom.minidom as xdm meta_configs = { "server_host": None, "server_uri": None, "session_key": None, "checkpoint_dir": None, } root = xdm.parseString(config_str) doc = root.documentElement for tag in meta_configs.iterkeys(): nodes = doc.getElementsByTagName(tag) if not nodes: logger.error("Invalid config, missing %s section", tag) raise Exception("Invalid config, missing %s section", tag) if (nodes[0].firstChild and nodes[0].firstChild.nodeType == nodes[0].TEXT_NODE): meta_configs[tag] = nodes[0].firstChild.data else: logger.error("Invalid config, expect text ndoe") raise Exception("Invalid config, expect text ndoe") if doc.nodeName == "input": configs = _parse_modinput_configs(doc, "configuration", "stanza") else: configs = _parse_modinput_configs(root, "items", "item") return meta_configs, configs
def _prepare_events(self, events): """ :param events: json dict list :return: a list string which contains strings and each string is less than 1000,000 bytes (HEC default) """ size = 0 all_events, batched = [], [] for evt in events: if not evt["event"]: continue try: evt = json.dumps(evt) except ValueError: logger.error("Invalid json event=%s", evt) continue if size + len(evt) + 1 >= self._max_event_size: if batched: all_events.append("\n".join(batched)) del batched[:] size = 0 batched.append(evt) size = size + len(evt) + 1 if batched: all_events.append("\n".join(batched)) return all_events
def _check_and_execute(self): wakeup_queue = self._wakeup_queue while 1: (next_expired_time, expired_timers) = self._get_expired_timers() for timer in expired_timers: try: timer() except Exception: logger.error(traceback.format_exc()) self._reset_timers(expired_timers) # Calc sleep time if next_expired_time: now = time() if now < next_expired_time: sleep_time = next_expired_time - now else: sleep_time = 0.1 else: sleep_time = 1 try: wakeup = wakeup_queue.get(timeout=sleep_time) if wakeup is None: break except Queue.Empty: pass logger.info("TimerQueue stopped.")
def content_request(uri, session_key, method, payload, err_msg): """ :return: response content if successful otherwise raise ConfRequestException """ resp, content = rest.splunkd_request(uri, session_key, method, data=payload, retry=3) if resp is None and content is None: return None if resp.status >= 200 and resp.status <= 204: return content else: msg = "{}, status={}, reason={}, detail={}".format( err_msg, resp.status, resp.reason, content) if method != "GET" and resp.status != 404: logger.error(msg) if resp.status == 404: raise ConfNotExistsException(msg) if resp.status == 409: raise ConfExistsException(msg) else: if content and "already exists" in content: raise ConfExistsException(msg) raise ConfRequestException(msg)
def get_modinput_config_str_from_stdin(): """ Get modinput from stdin which is feed by splunkd """ try: return sys.stdin.read() except Exception: logger.error(traceback.format_exc()) raise
def _do_monitor(self): while self._started: try: res = self._checker.check_orphan() if res: break time.sleep(1) except Exception: logger.error("Failed to monitor orphan process, reason=%s", traceback.format_exc())
def delete_http_input(self, name): """ :name: string, http input name """ try: self._conf_mgr.delete_data_input(self.input_type, name) except req.ConfNotExistsException: pass except Exception: logger.error("Failed to delete httpinput=%s, reason=%s", name, traceback.format_exc()) raise
def _handle_too_large_error(self, event, status, content): if status == 413 and "too large" in content: hcc = hecc.HECConfig(self._config["server_uri"], self._config["session_key"]) limits = hcc.get_limits() new_limit = { "max_content_length": int(limits["max_content_length"]) * 2 } if new_limit["max_content_length"] < self.max_length: hcc.set_limits(new_limit) return True else: # Each event should not exceed 25 MB logger.error("Event is too bigger. Drop event=%s", event) return False
def get_http_input(self, name): """ :name: string, http input name :return: list of http input config if successful or None when there is such http input or raise exception if other exception happened """ try: return self._conf_mgr.get_data_input(self.input_type, name) except req.ConfNotExistsException: return None except Exception: logger.error("Failed to get httpinput=%s, reason=%s", name, traceback.format_exc()) raise
def update_settings(self, settings): """ :settings: dict object { "enableSSL": 1/0, "disabled": 1/0, "useDeploymentServer": 1/0, "port": 8088, "output_mode": "json", } """ try: self._conf_mgr.update_data_input(self.input_type, self.input_type, settings) except Exception: logger.error("Failed to update httpinput settings, reason=%s", traceback.format_exc()) raise
def create_http_input(self, stanza): """ :stanza: dict object { "name": "akamai", "index": "main", (optional) "sourcetype": "akamai:cm:json", (optional) "description": "xxx", (optional) "token": "A0-5800-406B-9224-8E1DC4E720B6", (optional) } """ try: self._conf_mgr.create_data_input(self.input_type, stanza["name"], stanza) except req.ConfExistsException: pass except Exception: logger.error("Failed to create httpinput=%s, reason=%s", stanza["name"], traceback.format_exc()) raise
def get_modinput_configs_from_cli(modinput, modinput_stanza=None): """ @modinput: modinput name @modinput_stanza: modinput stanza name, for multiple instance only """ assert modinput splunkbin = sp.get_splunk_bin() cli = [splunkbin, "cmd", "splunkd", "print-modinput-config", modinput] if modinput_stanza: cli.append(modinput_stanza) out, err = subprocess.Popen(cli, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() if err: logger.error("Failed to get modinput configs with error: %s", err) return None, None else: return parse_modinput_configs(out)
def do_splunkd_request(splunkd_uri, session_key, method, headers, data, timeout, retry, http_req): headers = headers if headers is not None else {} headers["Connection"] = "keep-alive" headers["User-Agent"] = "curl/7.29.0" if session_key: if not session_key.startswith('Splunk'): session_key = "Splunk {0}".format(session_key) headers["Authorization"] = session_key content_type = headers.get("Content-Type") if not content_type: content_type = headers.get("content-type") if not content_type: content_type = "application/x-www-form-urlencoded" headers["Content-Type"] = content_type if data is not None and not isinstance(data, string_types): if content_type == "application/json": data = json.dumps(data) else: data = urllib.parse.urlencode(data) msg_temp = "Failed to send rest request=%s, errcode=%s, reason=%s" resp, content = None, None for _ in range(retry): try: resp, content = http_req(splunkd_uri, method, headers, data, timeout) except Exception: logger.error(msg_temp, splunkd_uri, "unknown", format_exc()) else: if resp.status not in (200, 201): if method != "GET" and resp.status != 404: logger.error(msg_temp, splunkd_uri, resp.status, code_to_msg(resp, content)) else: break return resp, content
def _parse_modinput_configs(root, outer_block, inner_block): """ When user splunkd spawns modinput script to do config check or run <?xml version="1.0" encoding="UTF-8"?> <input> <server_host>localhost.localdomain</server_host> <server_uri>https://127.0.0.1:8089</server_uri> <session_key>xxxyyyzzz</session_key> <checkpoint_dir>ckpt_dir</checkpoint_dir> <configuration> <stanza name="snow://alm_asset"> <param name="duration">60</param> <param name="host">localhost.localdomain</param> <param name="index">snow</param> <param name="priority">10</param> </stanza> ... </configuration> </input> When user create an stanza through data input on WebUI <?xml version="1.0" encoding="UTF-8"?> <items> <server_host>localhost.localdomain</server_host> <server_uri>https://127.0.0.1:8089</server_uri> <session_key>xxxyyyzzz</session_key> <checkpoint_dir>ckpt_dir</checkpoint_dir> <item name="abc"> <param name="duration">60</param> <param name="exclude"></param> <param name="host">localhost.localdomain</param> <param name="index">snow</param> <param name="priority">10</param> </item> </items> """ confs = root.getElementsByTagName(outer_block) if not confs: logger.error("Invalid config, missing %s section", outer_block) raise Exception( "Invalid config, missing %s section".format(outer_block)) configs = [] stanzas = confs[0].getElementsByTagName(inner_block) for stanza in stanzas: config = {} stanza_name = stanza.getAttribute("name") if not stanza_name: logger.error("Invalid config, missing name") raise Exception("Invalid config, missing name") config["name"] = stanza_name params = stanza.getElementsByTagName("param") for param in params: name = param.getAttribute("name") if (name and param.firstChild and param.firstChild.nodeType == param.firstChild.TEXT_NODE): config[name] = param.firstChild.data configs.append(config) return configs
def write_events(self, events, retry=3): """ :params: events a list of json dict which meets HEC event schema { "event": xx, "index": yy, "host": yy, "source": yy, "sourcetype": yy, "time": yy, } Clients should consider batching, since when batching here, upper layer may have data loss """ if not events: return last_ex = None events = self._prepare_events(events) for event in events: for _ in xrange(retry): try: response, content = sr.splunkd_request( self._uri, self._config["token"], method="POST", headers=self._headers, data=event, http=self._http) if response is None: logger.error( "Failed to write events through HEC, plese make " "sure http event collector is enabled and consult " "util.log for more details") time.sleep(2) continue if response.status in (200, 201): last_ex = None break else: msg = ("Failed to post events to HEC_URI={}, " "error_code={}, reason={}").format( self._uri, response.status, content) logger.error(msg) handled = self._handle_too_large_error( event, response.status, content) if not handled: last_ex = None break # We raise here to commonly use the below code block raise Exception(msg) except Exception as e: last_ex = e logger.error( "Failed to post events to HEC_URI=%s, error=%s", self._uri, traceback.format_exc()) time.sleep(2) if last_ex is not None: raise last_ex