class snowIncidentCommand(GeneratingCommand): assigned = Option(require=True, validate=validators.List()) assigned_by = Option(require=False) daysAgo = Option(require=False, validate=validators.Integer(0)) active = Option(require=True, validate=validators.Boolean()) limit = Option(require=False, validate=validators.Integer(0)) env = Option(require=False) def generate(self): self.logger.debug('snowIncidentCommand: %s', self) searchinfo = self.metadata.searchinfo app = AppConf(searchinfo.splunkd_uri, searchinfo.session_key) env = self.env.lower() if self.env else 'production' conf = app.get_config('getsnow')[env] assigned_by = 'assignment_group' if self.assigned_by == 'group' else 'assigned_to' assignment = {'table': 'sys_user_group', 'field': 'name'} if self.assigned_by == 'group' else {'table': 'sys_user', 'field': 'user_name'} limit = self.limit if self.limit else 10000 snowincident = snow(conf['url'], conf['user'], conf['password']) sids = snowincident.getsysid(assignment['table'], assignment['field'], self.assigned) filters = snowincident.filterbuilder(assigned_by, sids) glide = 'sys_created_on>=javascript:gs.daysAgo({})'.format(self.daysAgo) if self.daysAgo else '' url = snowincident.reqencode(filters, table='incident', glide_system=glide, active=self.active, sysparm_limit=limit) for record in snowincident.getrecords(url): record = snowincident.updatevalue(record, sourcetype='snow:incident') record['_raw'] = json.dumps(record) record = dictexpand(record) yield record
class UDPCommand(GeneratingCommand): #count = Option(require=True, validate=validators.Integer()) port = Option(require=True, validate=validators.Integer()) message = Option(require=True) ip = Option(require=True) def generate(self): IPADDR = self.ip PORTNUM = self.port PACKETDATA = self.message s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, 0) s.settimeout(5) try: # connect the socket, think of it as connecting the cable to the address location s.connect((IPADDR, PORTNUM)) # send the command s.send(PACKETDATA) except: pass data = s.recv(4096).decode("UTF-8") #for i in range(1, self.count + 1): #yield {'_time': time.time(), 'event_no': i, '_raw': data } yield {'_time': time.time(), '_raw': data } # Recieve UDP response #data = s.recv(4096) # close the socket s.close()
class functCommand(StreamingCommand): fieldname = Option( doc=''' **Syntax:** **fieldname=***<fieldname>* **Description:** Name of the field that will hold the match count''', require=True, validate=validators.Fieldname()) char_limit = Option( doc=''' **Syntax:** **char_limit=***<positive int>* **Description:** Determines how many characters in a field to process. Default is 150''', require=False, validate=validators.Integer(maximum=10000), default=150) def stream(self, records): self.logger.debug('functCommand: %s', self) # logs command line for record in records: for fieldname in self.fieldnames: char_limit = self.char_limit x = record[fieldname][0:char_limit] if re.search(r'\W{1}', record[fieldname]): x = re.sub(r'\w', "", x) x = re.sub(r'\s', "_", x) record[self.fieldname] = x else: x = re.sub(r'[B-Z]', "A", x) x = re.sub(r'[b-z]', "a", x) x = re.sub(r'[0-8]', "9", x) x = re.sub(r'\s', "w", x) record[self.fieldname] = x yield record
class gentestCommand(GeneratingCommand): count = Option(require=True, validate=validators.Integer()) def generate(self): output_dict = {} for i in range(self.count): output_dict['_time'] = time.time() output_dict['_raw'] = "HELLO WORLD!" yield output_dict
class GenerateTextCommand(GeneratingCommand): count = Option(require=True, validate=validators.Integer(0)) text = Option(require=True) def generate(self): text = self.text self.logger.debug("Generating %d events with text %s" % (self.count, self.text)) for i in range(1, self.count + 1): yield {'_serial': i, '_time': time.time(), '_raw': six.text_type(i) + '. ' + text}
class StubbedReportingCommand(ReportingCommand): boolean = Option( doc=''' **Syntax:** **boolean=***<value>* **Description:** A boolean value''', require=False, validate=validators.Boolean()) duration = Option( doc=''' **Syntax:** **duration=***<value>* **Description:** A length of time''', validate=validators.Duration()) fieldname = Option( doc=''' **Syntax:** **fieldname=***<value>* **Description:** Name of a field''', validate=validators.Fieldname()) file = Option( doc=''' **Syntax:** **file=***<value>* **Description:** Name of a file''', validate=validators.File(mode='r')) integer = Option( doc=''' **Syntax:** **integer=***<value>* **Description:** An integer value''', validate=validators.Integer()) optionname = Option( doc=''' **Syntax:** **optionname=***<value>* **Description:** The name of an option (used internally)''', validate=validators.OptionName()) regularexpression = Option( doc=''' **Syntax:** **regularexpression=***<value>* **Description:** Regular expression pattern to match''', validate=validators.RegularExpression()) set = Option( doc=''' **Syntax:** **set=***<value>* **Description:** Regular expression pattern to match''', validate=validators.Set("foo", "bar", "test")) @Configuration() def map(self, records): pass def reduce(self, records): pass
def __init__(self, **kwargs): super(ValidateTestSize, self).__init__() int_args = { 'minimum': kwargs.get('int_minimum'), 'maximum': kwargs.get('int_maximum'), } float_args = { 'minimum': kwargs.get('float_minimum'), 'maximum': kwargs.get('float_maximum'), } self.validate_int = validators.Integer(**int_args) self.validate_float = ValidateFloat(**float_args)
class SleepCommand(StreamingCommand): time = Option(doc=''' **Syntax:** **dnstimeout=***<dnstimeout>* **Description:** time to sleep in seconds''', require=True, validate=validators.Integer()) def stream(self, records): self.logging_level = "INFO" sleeptime = self.time # if dnsserver is not set use default form configfile one if self.time is None: sleeptime = 1 time.sleep(sleeptime) for record in records: yield record
class snowUserCommand(GeneratingCommand): user_name = Option(require=True, validate=validators.List()) daysAgo = Option(require=False, validate=validators.Integer(0)) env = Option(require=False) def generate(self): self.logger.debug('snowuser: %s', self) searchinfo = self.metadata.searchinfo app = AppConf(searchinfo.splunkd_uri, searchinfo.session_key) env = self.env.lower() if self.env else 'production' conf = app.get_config('getsnow')[env] snowuser = snow(conf['url'], conf['user'], conf['password']) filters = snowuser.filterbuilder('user_name', self.user_name) query_string = snowuser.reqencode(filters, 'sys_user') user_sid = [] for record in snowuser.getrecords(query_string): user_sid.append(record['sys_id']) record = snowuser.updatevalue(record, sourcetype='snow:user') record['_raw'] = json.dumps(record) record = dictexpand(record) yield record filters = snowuser.filterbuilder('assigned_to', user_sid) url = snowuser.reqencode(filters, table='alm_asset') for record in snowuser.getrecords(url): record = snowuser.updatevalue(record, sourcetype='snow:asset') record['_raw'] = json.dumps(record) record = dictexpand(record) yield record filters = snowuser.filterbuilder('opened_by', user_sid) glide = 'sys_created_on>=javascript:gs.daysAgo({})'.format( self.daysAgo) if self.daysAgo else '' url = snowuser.reqencode(filters, table='incident', glide_system=glide) for record in snowuser.getrecords(url): record = snowuser.updatevalue(record, sourcetype='snow:incident') record['_raw'] = json.dumps(record) record = dictexpand(record) yield record
class DatabricksJobCommand(GeneratingCommand): """Custom Command of databricksjob.""" # Take input from user using parameters job_id = Option(require=True, validate=validators.Integer(0)) notebook_params = Option(require=False) def generate(self): """Generating custom command.""" _LOGGER.info("Initiating databricksjob command") kv_log_info = { "user": self._metadata.searchinfo.username, "created_time": time.time(), "param": self._metadata.searchinfo.args, "run_id": "-", "output_url": "-", "result_url": "-", "command_status": "Failed", "error": "-", } session_key = self._metadata.searchinfo.session_key try: # Get job details client = com.DatabricksClient(session_key) payload = { "job_id": self.job_id, } _LOGGER.info( "Fetching job details before submitting the execution.") response = client.databricks_api("get", const.GET_JOB_ENDPOINT, args=payload) job_settings = response["settings"] tasks_list = list(set(job_settings.keys())) if "notebook_task" not in tasks_list: raise Exception( "Given job does not contains the notebook task. Hence terminating the execution." ) if ("spark_jar_task" in tasks_list or "spark_python_task" in tasks_list or "spark_submit_task" in tasks_list): raise Exception( "Given job contains one of the following task in addition to the notebook task. " "(spark_jar_task, spark_python_task and spark_submit_task) " "Hence terminating the execution.") # Request for executing the job _LOGGER.info("Preparing request body for execution.") payload["notebook_params"] = utils.format_to_json_parameters( self.notebook_params) _LOGGER.info("Submitting job for execution.") response = client.databricks_api("post", const.EXECUTE_JOB_ENDPOINT, data=payload) kv_log_info.update(response) run_id = response["run_id"] _LOGGER.info("Successfully executed the job with ID: {}.".format( self.job_id)) # Request to get the run_id details _LOGGER.info("Fetching details for run ID: {}.".format(run_id)) args = {"run_id": run_id} response = client.databricks_api("get", const.GET_RUN_ENDPOINT, args=args) output_url = response.get("run_page_url") if output_url: result_url = output_url.rstrip("/") + "/resultsOnly" kv_log_info["output_url"] = output_url kv_log_info["result_url"] = result_url kv_log_info["command_status"] = "Success" _LOGGER.info("Output url returned: {}".format(output_url)) except Exception as e: _LOGGER.error(e) _LOGGER.error(traceback.format_exc()) kv_log_info["error"] = str(e) self.write_error(str(e)) exit(1) finally: updated_kv_info = utils.update_kv_store_collection( self._metadata.searchinfo.splunkd_uri, const.KV_COLLECTION_NAME_EXECUTE_JOB, session_key, kv_log_info, ) yield updated_kv_info
class GenerateTextCommand(GeneratingCommand): account = Option(require=True) opt = Option(require=True, validate=validators.Integer(0)) def jira_url(self, url, endpoint): # For Splunk Cloud vetting, the URL must start with https:// if not url.startswith("https://"): return 'https://%s/rest/api/latest/%s' % (url, endpoint) else: return '%s/rest/api/latest/%s' % (url, endpoint) def get_jira_info(self, jira_headers, url, ssl_verify, proxy_dict, endpoint): response = requests.get(url=self.jira_url(url, endpoint), headers=jira_headers, verify=ssl_verify, proxies=proxy_dict) return response.json() def generate(self): storage_passwords = self.service.storage_passwords # global configuration conf_file = "ta_service_desk_simple_addon_settings" confs = self.service.confs[str(conf_file)] jira_passthrough_mode = None proxy_enabled = "0" proxy_url = None proxy_dict = None proxy_username = None for stanza in confs: if stanza.name == "advanced_configuration": for key, value in stanza.content.items(): if key == "jira_passthrough_mode": jira_passthrough_mode = value if stanza.name == "proxy": for key, value in stanza.content.items(): if key == "proxy_enabled": proxy_enabled = value if key == "proxy_port": proxy_port = value if key == "proxy_rdns": proxy_rdns = value if key == "proxy_type": proxy_type = value if key == "proxy_url": proxy_url = value if key == "proxy_username": proxy_username = value if proxy_enabled == "1": # get proxy password if proxy_username: proxy_password = None # get proxy password, if any credential_realm = '__REST_CREDENTIAL__#TA-jira-service-desk-simple-addon#configs/conf-ta_service_desk_simple_addon_settings' for credential in storage_passwords: if credential.content.get('realm') == str(credential_realm) \ and credential.content.get('clear_password').find('proxy_password') > 0: proxy_password = json.loads( credential.content.get('clear_password')).get( 'proxy_password') break if proxy_type == 'http': proxy_dict = { "http": "http://" + proxy_username + ":" + proxy_password + "@" + proxy_url + ":" + proxy_port, "https": "https://" + proxy_username + ":" + proxy_password + "@" + proxy_url + ":" + proxy_port } else: proxy_dict = { "http": str(proxy_type) + "://" + proxy_username + ":" + proxy_password + "@" + proxy_url + ":" + proxy_port, "https": str(proxy_type) + "://" + proxy_username + ":" + proxy_password + "@" + proxy_url + ":" + proxy_port } else: proxy_dict = { "http": proxy_url + ":" + proxy_port, "https": proxy_url + ":" + proxy_port } # get all acounts accounts = [] conf_file = "ta_service_desk_simple_addon_account" confs = self.service.confs[str(conf_file)] for stanza in confs: # get all accounts for name in stanza.name: accounts.append(stanza.name) break # run if self.account == '_all': for account in accounts: # account configuration jira_ssl_certificate_validation = None jira_ssl_certificate_path = None username = None password = None conf_file = "ta_service_desk_simple_addon_account" confs = self.service.confs[str(conf_file)] for stanza in confs: if stanza.name == str(account): for key, value in stanza.content.items(): if key == "jira_url": jira_url = value if key == "jira_ssl_certificate_validation": jira_ssl_certificate_validation = value if key == "jira_ssl_certificate_path": jira_ssl_certificate_path = value if key == 'auth_type': auth_type = value if key == 'jira_auth_mode': jira_auth_mode = value if key == 'username': username = value # end of get configuration credential_username = str(account) + '``splunk_cred_sep``1' credential_realm = '__REST_CREDENTIAL__#TA-jira-service-desk-simple-addon#configs/conf-ta_service_desk_simple_addon_account' for credential in storage_passwords: if credential.content.get('username') == str(credential_username) \ and credential.content.get('realm') == str(credential_realm) \ and credential.content.get('clear_password').find('password') > 0: password = json.loads( credential.content.get('clear_password')).get( 'password') break # Build the authentication header for JIRA if str(jira_auth_mode) == 'basic': authorization = username + ':' + password b64_auth = base64.b64encode( authorization.encode()).decode() jira_headers = { 'Authorization': 'Basic %s' % b64_auth, 'Content-Type': 'application/json', } elif str(jira_auth_mode) == 'pat': jira_headers = { 'Authorization': 'Bearer %s' % str(password), 'Content-Type': 'application/json', } if jira_ssl_certificate_validation: if jira_ssl_certificate_validation == '0': ssl_verify = False elif jira_ssl_certificate_validation == '1' and jira_ssl_certificate_path and os.path.isfile( jira_ssl_certificate_path): ssl_verify = str(jira_ssl_certificate_path) elif jira_ssl_certificate_validation == '1': ssl_verify = True if self.opt == 1: for project in self.get_jira_info(jira_headers, jira_url, ssl_verify, proxy_dict, 'project'): usercreds = { '_time': time.time(), 'account': str(account), 'key': project.get('key'), 'key_projects': project.get('key') + " - " + project.get('name') } yield usercreds if self.opt == 2: for issue in self.get_jira_info(jira_headers, jira_url, ssl_verify, proxy_dict, 'issuetype'): usercreds = { '_time': time.time(), 'account': str(account), 'issues': issue.get('name') } yield usercreds if self.opt == 3: for priority in self.get_jira_info(jira_headers, jira_url, ssl_verify, proxy_dict, 'priority'): usercreds = { '_time': time.time(), 'account': str(account), 'priorities': priority.get('name') } yield usercreds if self.opt == 4: for status in self.get_jira_info(jira_headers, jira_url, ssl_verify, proxy_dict, 'status'): result = { '_time': time.time(), 'account': str(account), 'status': status.get('name'), 'statusCategory': status.get('statusCategory').get('name') } yield result else: # account configuration isfound = False jira_ssl_certificate_validation = None jira_ssl_certificate_path = None username = None password = None conf_file = "ta_service_desk_simple_addon_account" confs = self.service.confs[str(conf_file)] for stanza in confs: if stanza.name == str(self.account): isfound = True for key, value in stanza.content.items(): if key == "jira_url": jira_url = value if key == "jira_ssl_certificate_validation": jira_ssl_certificate_validation = value if key == "jira_ssl_certificate_path": jira_ssl_certificate_path = value if key == 'auth_type': auth_type = value if key == 'jira_auth_mode': jira_auth_mode = value if key == 'username': username = value # end of get configuration # Stop here if we cannot find the submitted account if not isfound: self.logger.fatal( 'This acount has not been configured on this instance, cannot proceed!: %s', self) # else get the password else: credential_username = str( self.account) + '``splunk_cred_sep``1' credential_realm = '__REST_CREDENTIAL__#TA-jira-service-desk-simple-addon#configs/conf-ta_service_desk_simple_addon_account' for credential in storage_passwords: if credential.content.get('username') == str(credential_username) \ and credential.content.get('realm') == str(credential_realm) \ and credential.content.get('clear_password').find('password') > 0: password = json.loads( credential.content.get('clear_password')).get( 'password') break # Build the authentication header for JIRA if str(jira_auth_mode) == 'basic': authorization = username + ':' + password b64_auth = base64.b64encode(authorization.encode()).decode() jira_headers = { 'Authorization': 'Basic %s' % b64_auth, 'Content-Type': 'application/json', } elif str(jira_auth_mode) == 'pat': jira_headers = { 'Authorization': 'Bearer %s' % str(password), 'Content-Type': 'application/json', } if jira_ssl_certificate_validation: if jira_ssl_certificate_validation == '0': ssl_verify = False elif jira_ssl_certificate_validation == '1' and jira_ssl_certificate_path and os.path.isfile( jira_ssl_certificate_path): ssl_verify = str(jira_ssl_certificate_path) elif jira_ssl_certificate_validation == '1': ssl_verify = True if self.opt == 1: for project in self.get_jira_info(jira_headers, jira_url, ssl_verify, proxy_dict, 'project'): usercreds = { '_time': time.time(), 'account': str(self.account), 'key': project.get('key'), 'key_projects': project.get('key') + " - " + project.get('name') } yield usercreds if self.opt == 2: for issue in self.get_jira_info(jira_headers, jira_url, ssl_verify, proxy_dict, 'issuetype'): usercreds = { '_time': time.time(), 'account': str(self.account), 'issues': issue.get('name') } yield usercreds if self.opt == 3: for priority in self.get_jira_info(jira_headers, jira_url, ssl_verify, proxy_dict, 'priority'): usercreds = { '_time': time.time(), 'account': str(self.account), 'priorities': priority.get('name') } yield usercreds if self.opt == 4: for status in self.get_jira_info(jira_headers, jira_url, ssl_verify, proxy_dict, 'status'): result = { '_time': time.time(), 'account': str(self.account), 'status': status.get('name'), 'statusCategory': status.get('statusCategory').get('name') } yield result
class DatabricksQueryCommand(GeneratingCommand): """Custom Command of databricksquery.""" # Take input from user using parameters cluster = Option(require=False) query = Option(require=True) command_timeout = Option(require=False, validate=validators.Integer(minimum=1)) def generate(self): """Generating custom command.""" _LOGGER.info("Initiating databricksquery command") command_timeout_in_seconds = self.command_timeout or const.COMMAND_TIMEOUT_IN_SECONDS _LOGGER.info("Setting command timeout to {} seconds.".format(command_timeout_in_seconds)) # Get session key session_key = self._metadata.searchinfo.session_key try: # Fetching cluster name self.cluster = self.cluster or utils.get_databricks_configs().get("cluster_name") if not self.cluster: raise Exception( "Databricks cluster is required to execute this custom command. " "Provide a cluster parameter or configure the cluster in the TA's configuration page." ) # Request to get cluster ID _LOGGER.info("Requesting cluster ID for cluster: {}.".format(self.cluster)) cluster_id = com.get_cluster_id(session_key, self.cluster) _LOGGER.info("Cluster ID received: {}.".format(cluster_id)) # Request to create context _LOGGER.info("Creating Context in cluster.") payload = {"language": "sql", "clusterId": cluster_id} response = com.databricks_api( "post", const.CONTEXT_ENDPOINT, session_key, data=payload ) context_id = response.get("id") _LOGGER.info("Context created: {}.".format(context_id)) # Request to execute command _LOGGER.info("Submitting SQL query for execution.") payload["contextId"] = context_id payload["command"] = self.query response = com.databricks_api( "post", const.COMMAND_ENDPOINT, session_key, data=payload ) command_id = response.get("id") _LOGGER.info("Query submitted, command id: {}.".format(command_id)) # pulling mechanism _LOGGER.info("Fetching query execution status.") status = None args = { "clusterId": cluster_id, "contextId": context_id, "commandId": command_id, } total_wait_time = 0 while total_wait_time <= command_timeout_in_seconds: response = com.databricks_api( "get", const.STATUS_ENDPOINT, session_key, args=args ) status = response.get("status") _LOGGER.info("Query execution status: {}.".format(status)) if status in ("Cancelled", "Error"): raise Exception( "Could not complete the query execution. Status: {}.".format(status) ) elif status == "Finished": if response["results"]["resultType"] == "error": msg = response["results"].get("summary", "Error encountered while executing query.") raise Exception(str(msg)) if response["results"]["resultType"] != "table": raise Exception("Encountered unknown result type, terminating the execution.") if response["results"].get("truncated", True): self.write_warning("Results are truncated due to Databricks API limitations.") _LOGGER.info("Query execution successful. Preparing data.") # Prepare list of Headers headers = response["results"]["schema"] schema = [] for header in headers: field = header.get("name") schema.append(field) # Fetch Data data = response["results"]["data"] for d in data: yield dict(zip(schema, d)) _LOGGER.info("Data parsed successfully.") break seconds_to_timeout = command_timeout_in_seconds - total_wait_time if seconds_to_timeout < const.COMMAND_SLEEP_INTERVAL_IN_SECONDS: if not seconds_to_timeout: total_wait_time += 1 continue _LOGGER.info( "Query execution in progress, will retry after {} seconds.".format( str(seconds_to_timeout))) time.sleep(seconds_to_timeout) total_wait_time += seconds_to_timeout continue _LOGGER.info( "Query execution in progress, will retry after {} seconds.".format( str(const.COMMAND_SLEEP_INTERVAL_IN_SECONDS))) time.sleep(const.COMMAND_SLEEP_INTERVAL_IN_SECONDS) total_wait_time += const.COMMAND_SLEEP_INTERVAL_IN_SECONDS else: # Timeout scenario msg = "Command execution timed out. Last status: {}.".format(status) _LOGGER.info(msg) self.write_error(msg) # Destroy the context to free-up space in Databricks if context_id: _LOGGER.info("Deleting context.") payload = {"contextId": context_id, "clusterId": cluster_id} _ = com.databricks_api( "post", const.CONTEXT_DESTROY_ENDPOINT, session_key, data=payload ) _LOGGER.info("Context deleted successfully.") except Exception as e: _LOGGER.error(e) _LOGGER.error(traceback.format_exc()) self.write_error(str(e))
class MongoConnectCommand(GeneratingCommand): """ %(synopsis) ##Syntax %(syntax) ##Description %(description) ##TODO: """ s = Option(require=False) db = Option(require=False, default='test') col = Option(require=False, default='tweets') earliest = Option(require=False, default=(datetime.now() - timedelta(hours=4)).strftime("%x %X")) latest = Option(require=False, default=datetime.now().strftime("%x %X")) limit = Option(require=False, default=10, validate=validators.Integer()) _mongo_conf = configparser.ConfigParser() _mongo_conf.read(os.path.dirname(__file__) + '/../default/mongo.conf') _props_conf = configparser.ConfigParser() _props_conf.read(os.path.dirname(__file__) + '/../default/props.conf') _transforms_conf = configparser.ConfigParser() _transforms_conf.read( os.path.dirname(__file__) + '/../default/transforms.conf') _client = MongoClient(host='127.0.0.1', port=27017, username='******', password='******', authSource='admin') kv = re.compile(r"\b(\w+)\s*?=\s*([^=]*)(?=\s+\w+\s*=|$)") re_alias = re.compile(r"(\w+) as (\w+)") # Add more strings that confuse the parser in the list UNINTERESTING = set( chain(dateutil.parser.parserinfo.JUMP, dateutil.parser.parserinfo.PERTAIN, ['a'])) _extracts = {} _transforms = {} _aliases = {} def _get_date(self, tokens): for end in xrange(len(tokens), 0, -1): region = tokens[:end] if all(token.isspace() or token in self.UNINTERESTING for token in region): continue text = ''.join(region) try: date = dateutil.parser.parse(text) return end, date except ValueError: pass def find_dates(self, text, max_tokens=50, allow_overlapping=False): tokens = filter(None, re.split(r'(\S+|\W+)', text)) skip_dates_ending_before = 0 for start in xrange(len(tokens)): region = tokens[start:start + max_tokens] result = self._get_date(region) if result is not None: end, date = result if allow_overlapping or end > skip_dates_ending_before: skip_dates_ending_before = end yield date def init(self): # Initialize sourcetypes, props, aliases and transforms for sourcetype in self._props_conf: for key, value in self._props_conf[sourcetype].items(): if key.startswith('extract-'): if not sourcetype in self._extracts: self._extracts[sourcetype] = [] self._extracts[sourcetype].append( re.compile(value.replace('?<', '?P<'))) if key.startswith('report-'): if not sourcetype in self._transforms: self._transforms[sourcetype] = [] if value in self._transforms_conf: delim = self._transforms_conf[value]['DELIMS'].replace( '"', '') fields = self._transforms_conf[value][ 'FIELDS'].replace('"', '').split(',') transform = {} transform['delim'] = delim transform['fields'] = fields self._transforms[sourcetype].append(transform) if key.startswith('fieldalias-'): if not sourcetype in self._aliases: self._aliases[sourcetype] = {} match = self.re_alias.match(value) if match: field, alias = match.groups() self._aliases[sourcetype][field] = alias # Initialize database self.database = self._client[self.db] #self.collection = self.database[self.col] def flatten(self, _dict, key=""): if key != "": key = key + '_' for k, v in _dict.items(): if isinstance(v, list): for elt in v: if isinstance(elt, dict): for k2, v2 in self.flatten(elt, key + k): yield k2, v2 else: yield key + k, elt elif isinstance(v, dict): for k2, v2 in self.flatten(v, key + k): yield k2, v2 else: if k != 'id' and k != 'id_str': yield key + k, v def generate(self): self.init() rets = [] fields = {} q = {} if self.s: q = {'$text': {'$search': self.s}} q['_time'] = {} q['_time']['$gte'] = dateutil.parser.parse(self.earliest) q['_time']['$lte'] = dateutil.parser.parse(self.latest) s = [('_time', -1)] collections = self.col.split(',') for collection in collections: for doc in self.database[collection].find(q).sort(s).limit( self.limit): ret = {} try: try: if '_time' in doc: ret['_time'] = doc['_time'].strftime("%s.%f") del doc['_time'] else: for datefield in self._mongo_conf['fields'][ 'DateFields'].split(','): if datefield in doc: for date in self.find_dates( doc[datefield], allow_overlapping=False): ret['_time'] = date.strftime("%s.%f") break if '_time' in ret: break except Exception as e: #print("ERROR: ", str(e)) ret['_raw'] = "Error: %s." % str(e) if not '_time' in ret: ret['_time'] = time.time() #print(ret['_time']) ret['_raw'] = str( doc['message']) if 'message' in doc else dumps(doc) if 'source' in doc: del doc['source'] ret['source'] = doc[ '_source'] if '_source' in doc else self.db if 'sourcetype' in doc: del doc['sourcetype'] ret['sourcetype'] = doc[ '_sourcetype'] if '_sourcetype' in doc else self.col sourcetype = ret['sourcetype'] #for field in doc: # ret[field] = doc[field] for field, value in self.flatten(doc): #print("KV: ", field, value) ret[field] = value #doc[field] for (field, value) in self.kv.findall(ret['_raw']): ret[field] = value.replace('"', '') if sourcetype in self._extracts: for extract in self._extracts[sourcetype]: match = extract.search(ret['_raw']) if match: for field, value in match.groupdict().items(): ret[field] = value if sourcetype in self._transforms: for transform in self._transforms[sourcetype]: f = 0 for value in (list( reader([ret['_raw']], delimiter=str( transform['delim'])))[0]): if f >= len(transform['fields']): break if transform['fields'][f] != '': ret[transform['fields'][f]] = value f = f + 1 if sourcetype in self._aliases: for field, value in ret.items(): if field in self._aliases[sourcetype]: ret[self._aliases[sourcetype] [field]] = ret[field] for field in ret: if not field in fields: fields[field] = 1 except Exception as e: ret['_raw'] = "Error: %s." % str(e) pass rets.append(ret) for ret in rets: for field in fields: if not field in ret: ret[field] = '' yield ret
class BlaggertCommand(StreamingCommand): opt_token = Option(doc=''' **Syntax:** **token=***<fieldname>* **Description:** HEC token to use. **Default:** None''', name='token', require=True, validate=validators.Fieldname()) opt_server = Option(doc=''' **Syntax:** **server=***<fieldname>* **Description:** Server to send the payload to. **Default:** localhost''', name='server', require=False, default='localhost', validate=validators.Fieldname()) opt_port = Option(doc=''' **Syntax:** **port=***<fieldname>* **Description:** HEC Port, not fortified red wine. **Default:** 8088''', name='port', require=False, default=8088, validate=validators.Integer()) def __init__(self): super(BlaggertCommand, self).__init__() def prepare(self): return def stream(self, records): # Put your event transformation code here url = "https://{}:{}/services/collector/event".format( self.opt_server, self.opt_port) headers = {"Authorization": "Splunk {}".format(self.opt_token)} for record in records: self.logger.info('Record {0}'.format(record)) t2 = time.time() payload = {"event": {"event_id": str(uuid.uuid4())}} for k, v in record.iteritems(): payload["event"][k] = v payload_str = json.dumps(payload) self.logger.info('send to HEC url={} - payload='.format( url, payload_str)) try: res = requests.post(url, data=payload_str, headers=headers, verify=False) res.raise_for_status() self.logger.info("Sweet as {} {}".format( res.status_code, res.text)) record["blaggert_says"] = "Done it" except Exception as e: self.logger.error('Send HEC Caught exception: {}'.format(e)) record["blaggert_says"] = "Buggered it {}".format(e) yield record
class Outliers(OptionRemoteStreamingCommand): threshold = Option(require=False, default=0.01, validate=FloatValidator(minimum=0, maximum=1)) # One-Class SVM arguments kernel = Option(require=False, default='rbf') degree = Option(require=False, default=3, validate=validators.Integer(minimum=1)) gamma = Option(require=False, default=0.1, validate=FloatValidator(minimum=0, maximum=1)) coef0 = Option(require=False, default=0.0, validate=FloatValidator()) # Covariance Estimator arguments support_fraction = Option(require=False, validate=FloatValidator(minimum=0, maximum=1)) showmah = Option(require=False, default=False, validate=validators.Boolean()) classifier = Option(require=False, default='one_class_svm') code = """ import os, sys, numbers, math import numpy as np import scipy.sparse as sp from scipy import stats from sklearn import svm from sklearn.covariance import EllipticEnvelope from sklearn.feature_extraction.text import HashingVectorizer if __name__ == '__channelexec__': args = channel.receive() fraction = 1 - args['threshold'] fields = args.get('fieldnames') or ['_raw'] by_fields = None try: by_index = fields.index("by") by_fields = fields[(by_index+1):] fields = fields[:by_index] except: pass classifier = args['classifier'] svm_args = { 'nu': 0.95 * fraction + 0.05, 'kernel': args['kernel'], 'degree': args['degree'], 'gamma': args['gamma'], 'coef0': args['coef0'] } rc_args = { 'contamination': args['threshold'], 'support_fraction': args['support_fraction'] } classifiers = { 'one_class_svm': svm.OneClassSVM(**svm_args), 'covariance_estimator': EllipticEnvelope(**rc_args) } records = [] for record in channel: if not record: break records.append(record) if records: vectorizer = HashingVectorizer(ngram_range=(1,3), n_features=int(math.sqrt(len(records)))) X = sp.lil_matrix((len(records),vectorizer.n_features)) for i, record in enumerate(records): nums = [] strs = [] for field in fields: if isinstance(record.get(field), numbers.Number): nums.append(record[field]) else: strs.append(str(record.get(field) or "")) if nums: X[i] = np.array(nums, dtype=np.float64) elif strs: X[i] = vectorizer.transform([" ".join(strs)]) X = X.toarray() y_pred = None mah = None clf = classifiers.get(classifier) if clf: try: clf.fit(X) y = clf.decision_function(X).ravel() threshold = stats.scoreatpercentile(y, 100 * fraction) y_pred = y > threshold if classifier == 'covariance_estimator' and args['showmah']: mah = clf.mahalanobis(X) except ValueError: y_pred = np.zeros((X.shape[0])) for i, y in enumerate(y_pred): if y: record = records[i] if mah is not None: record['mahalanobis'] = mah[i].item() channel.send(record) else: channel.send({ "error": "Incorrect classifier specified %s" % classifier }) """ def __dir__(self): return [ 'threshold', 'kernel', 'degree', 'gamma', 'coef0', 'support_fraction', 'showmah', 'classifier' ]
class EsCommand(GeneratingCommand): """ Generates events that are the result of a query against Elasticsearch ##Syntax .. code-block:: es index=<string> | q=<string> | fields=<string> | oldest=<string> | earl=<string> | limit=<int> ##Description The :code:`es` issue a query to ElasticSearch, where the query is specified in :code:`q`. ##Example .. code-block:: | es oldest=now-100d earliest=now query="some text" index=nagios* limit=1000 field=message This example generates events drawn from the result of the query """ index = Option(doc='', require=False, default="*") q = Option(doc='', require=True) fields = Option(doc='', require=False, default="message") oldest = Option(doc='', require=False, default="now") earl = Option(doc='', require=False, default="now-1d") limit = Option(doc='', require=False, validate=validators.Integer(), default=100) def generate(self): #self.logger.debug('SimulateCommand: %s' % self) # log command line config = self.get_configuration() #pp = pprint.PrettyPrinter(indent=4) self.logger.debug('Setup ES') es = Elasticsearch() body = { "size": limit, "query": { "filtered": { "query": { "query_string": { "query": q } } } } } #pp.pprint(body); res = es.search(size=50, index=index, body=body) # if response.status_code != 200: # yield {'ERROR': results['error']['text']} # return # date_time = '2014-12-21T16:11:18.419Z' # pattern = '%Y-%m-%dT%H:%M:%S.%fZ' for hit in res['hits']['hits']: yield self.getEvent(hit) def getEvent(self, result): # hit["_source"][defaultField] = hit["_source"][defaultField].replace('"',' '); # epochTimestamp = hit['_source']['@timestamp']; # hit['_source']['_epoch'] = int(time.mktime(time.strptime(epochTimestamp, pattern))) # hit['_source']["_raw"]=hit['_source'][defaultField] event = { '_time': time.time(), '_index': result['_index'], '_type': result['_type'], '_id': result['_id'], '_score': result['_score'] } event["_raw"] = json.dumps(result) return event def get_configuration(self): sourcePath = os.path.dirname(os.path.abspath(__file__)) config_file = open(sourcePath + '/config.json') return json.load(config_file) def __init__(self): super(GeneratingCommand, self).__init__()