def ea(): rules = [{'es_host': '', 'es_port': '', 'name': 'anytest', 'index': 'idx', 'filter': [], 'include': ['@timestamp'], 'aggregation': datetime.timedelta(0), 'realert': datetime.timedelta(0), 'processed_hits': {}, 'timestamp_field': '@timestamp', 'match_enhancements': []}] conf = {'rules_folder': 'rules', 'run_every': datetime.timedelta(minutes=10), 'buffer_time': datetime.timedelta(minutes=5), 'alert_time_limit': datetime.timedelta(hours=24), 'es_host': 'es', 'es_port': 14900, 'writeback_index': 'wb', 'rules': rules, 'max_query_size': 100000, 'old_query_limit': datetime.timedelta(weeks=1)} elasticsearch.client.Elasticsearch = mock_es_client with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf ea = ElastAlerter(['--pin_rules']) ea.rules[0]['type'] = mock_ruletype() ea.rules[0]['alert'] = [mock_alert()] ea.writeback_es = mock_es_client() ea.writeback_es.search.return_value = {'hits': {'hits': []}} ea.writeback_es.create.return_value = {'_id': 'ABCD'} ea.current_es = mock_es_client('', '') return ea
def mock_hits(self, rule, start, end, index): """ Mocks the effects of get_hits using global data instead of Elasticsearch. """ docs = [] for doc in self.data: if start <= ts_to_dt(doc[rule['timestamp_field']]) < end: docs.append(doc) # Remove all fields which don't match 'include' for doc in docs: fields_to_remove = [] for field in doc: if field != '_id': if not any([ re.match(incl.replace('*', '.*'), field) for incl in rule['include'] ]): fields_to_remove.append(field) map(doc.pop, fields_to_remove) # Separate _source and _id, convert timestamps resp = [{'_source': doc, '_id': doc['_id']} for doc in docs] for doc in resp: doc['_source'].pop('_id') ElastAlerter.process_hits(rule, resp) return resp
def check_files(): print("Note: This tool is for testing filters and config syntax. It will not process data or alert.\n") parser = argparse.ArgumentParser(description='Validate a rule configuration') parser.add_argument('files', metavar='file', type=str, nargs='+', help='rule configuration filename') parser.add_argument('--schema-only', action='store_true', help='Show only schema errors; do not run query') parser.add_argument('--days', type=int, default=[1, 7], nargs='+', help='Query the previous N days with this rule') args = parser.parse_args() for filename in args.files: with open(filename) as fh: conf = yaml.load(fh) load_options(conf) print("Successfully loaded %s\n" % (conf['name'])) if args.schema_only: continue es_client = Elasticsearch(host=conf['es_host'], port=conf['es_port']) for days in args.days: start_time = ts_now() - datetime.timedelta(days=days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) try: res = es_client.search(index, size=1000, body=query) except Exception as e: print("Error running your filter:") print(repr(e)[:2048]) exit(1) num_hits = len(res['hits']['hits']) print("Got %s hits from the last %s day%s" % (num_hits if num_hits != 1000 else '1000+', days, 's' if days > 1 else '')) if num_hits: print("\nAvailable terms in first hit:") terms = res['hits']['hits'][0]['_source'] print_terms(terms, '') pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!") if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!") include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term)) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term)) print('')
def check_files(): print("Note: This tool is for testing filters and config syntax. It will not process data or alert.\n") parser = argparse.ArgumentParser(description='Validate a rule configuration') parser.add_argument('files', metavar='file', type=str, nargs='+', help='rule configuration filename') parser.add_argument('--days', type=int, default=[1, 7], nargs='+', help='Query the previous N days with this rule') args = parser.parse_args() for filename in args.files: with open(filename) as fh: conf = yaml.load(fh) load_options(conf) print("Successfully loaded %s\n" % (conf['name'])) es_client = Elasticsearch(host=conf['es_host'], port=conf['es_port']) for days in args.days: start_time = ts_now() - datetime.timedelta(days=days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) try: res = es_client.search(index, size=1000, body=query) except Exception as e: print("Error running your filter:") print(repr(e)[:2048]) exit(1) num_hits = len(res['hits']['hits']) print("Got %s hits from the last %s day%s" % (num_hits if num_hits != 1000 else '1000+', days, 's' if days > 1 else '')) if num_hits: print("\nAvailable terms in first hit:") terms = res['hits']['hits'][0]['_source'] print_terms(terms, '') pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!") if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!") include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term)) for term in conf.get('top_count_keys', []): if term not in terms: print("top_count_key %s may be missing" % (term)) print('')
def ea(): rules = [ { "es_host": "", "es_port": "", "name": "anytest", "index": "idx", "filter": [], "not_filter": [], "include": ["@timestamp"], "aggregation": datetime.timedelta(0), "realert": datetime.timedelta(0), "processed_hits": {}, "timestamp_field": "@timestamp", "match_enhancements": [], "rule_file": "blah.yaml", "ts_to_dt": ts_to_dt, "dt_to_ts": dt_to_ts, "_source_enabled": True, } ] conf = { "rules_folder": "rules", "run_every": datetime.timedelta(minutes=10), "buffer_time": datetime.timedelta(minutes=5), "alert_time_limit": datetime.timedelta(hours=24), "es_host": "es", "es_port": 14900, "writeback_index": "wb", "rules": rules, "max_query_size": 100000, "old_query_limit": datetime.timedelta(weeks=1), "disable_rules_on_error": False, } elasticsearch.client.Elasticsearch = mock_es_client with mock.patch("elastalert.elastalert.get_rule_hashes"): with mock.patch("elastalert.elastalert.load_rules") as load_conf: load_conf.return_value = conf ea = ElastAlerter(["--pin_rules"]) ea.rules[0]["type"] = mock_ruletype() ea.rules[0]["alert"] = [mock_alert()] ea.writeback_es = mock_es_client() ea.writeback_es.search.return_value = {"hits": {"hits": []}} ea.writeback_es.create.return_value = {"_id": "ABCD"} ea.current_es = mock_es_client("", "") return ea
def mock_hits(self, rule, start, end, index): """ Mocks the effects of get_hits using global data instead of Elasticsearch. """ docs = [] for doc in self.data: if start <= ts_to_dt(doc[rule['timestamp_field']]) < end: docs.append(doc) # Remove all fields which don't match 'include' for doc in docs: for field in doc: if field != '_id': if not any([re.match(incl.replace('*', '.*'), field) for incl in rule['include']]): doc.pop(field) # Separate _source and _id, convert timestamps resp = [{'_source': doc, '_id': doc['_id']} for doc in docs] for doc in resp: doc['_source'].pop('_id') ElastAlerter.process_hits(rule, resp) return resp
def mock_hits(self, rule, start, end, index): """ Mocks the effects of get_hits using global data instead of Elasticsearch. """ docs = [] for doc in self.data: if start <= ts_to_dt(doc[rule["timestamp_field"]]) < end: docs.append(doc) # Remove all fields which don't match 'include' for doc in docs: fields_to_remove = [] for field in doc: if field != "_id": if not any([re.match(incl.replace("*", ".*"), field) for incl in rule["include"]]): fields_to_remove.append(field) map(doc.pop, fields_to_remove) # Separate _source and _id, convert timestamps resp = [{"_source": doc, "_id": doc["_id"]} for doc in docs] for doc in resp: doc["_source"].pop("_id") ElastAlerter.process_hits(rule, resp) return resp
def ea(): rules = [{ 'es_host': '', 'es_port': 14900, 'name': 'anytest', 'index': 'idx', 'filter': [], 'include': ['@timestamp'], 'aggregation': datetime.timedelta(0), 'realert': datetime.timedelta(0), 'processed_hits': {}, 'timestamp_field': '@timestamp', 'match_enhancements': [], 'rule_file': 'blah.yaml', 'max_query_size': 10000, 'ts_to_dt': ts_to_dt, 'dt_to_ts': dt_to_ts, '_source_enabled': True }] conf = { 'rules_folder': 'rules', 'run_every': datetime.timedelta(minutes=10), 'buffer_time': datetime.timedelta(minutes=5), 'alert_time_limit': datetime.timedelta(hours=24), 'es_host': 'es', 'es_port': 14900, 'writeback_index': 'wb', 'rules': rules, 'max_query_size': 10000, 'old_query_limit': datetime.timedelta(weeks=1), 'disable_rules_on_error': False, 'scroll_keepalive': '30s' } elasticsearch.client.Elasticsearch = mock_es_client with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf ea = ElastAlerter(['--pin_rules']) ea.rules[0]['type'] = mock_ruletype() ea.rules[0]['alert'] = [mock_alert()] ea.writeback_es = mock_es_client() ea.writeback_es.search.return_value = {'hits': {'hits': []}} ea.writeback_es.create.return_value = {'_id': 'ABCD'} ea.current_es = mock_es_client('', '') return ea
def test_file(self, conf, args): """ Loads a rule config file, performs a query over the last day (args.days), lists available keys and prints the number of results. """ load_options(conf, {}) print("Successfully loaded %s\n" % (conf['name'])) if args.schema_only: return [] # Set up elasticsearch client and query es_config = ElastAlerter.build_es_conn_config(conf) es_client = ElastAlerter.new_elasticsearch(es_config) start_time = ts_now() - datetime.timedelta(days=args.days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) # Get one document for schema try: res = es_client.search(index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res['hits']['hits']) if not num_hits: return [] terms = res['hits']['hits'][0]['_source'] doc_type = res['hits']['hits'][0]['_type'] # Get a count of all docs count_query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False) count_query = {'query': {'filtered': count_query}} try: res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = res['count'] print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else '')) print("\nAvailable terms in first hit:") print_terms(terms, '') # Check for missing keys pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term), file=sys.stderr) print('') # Newline # Download up to 10,000 documents to save if args.save and not args.count: try: res = es_client.search(index, size=10000, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res['hits']['hits']) print("Downloaded %s documents to save" % (num_hits)) return res['hits']['hits'] return None
def run_elastalert(self, rule, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Mock configuration. Nothing here is used except run_every conf = {'rules_folder': 'rules', 'run_every': datetime.timedelta(minutes=5), 'buffer_time': datetime.timedelta(minutes=45), 'alert_time_limit': datetime.timedelta(hours=24), 'es_host': 'es', 'es_port': 14900, 'writeback_index': 'wb', 'max_query_size': 100000, 'old_query_limit': datetime.timedelta(weeks=1), 'disable_rules_on_error': False} # Load and instantiate rule load_options(rule, conf) load_modules(rule) conf['rules'] = [rule] # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return try: self.data.sort(key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) return # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join([random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: endtime = ts_now() starttime = endtime - datetime.timedelta(days=args.days) # Set run_every to cover the entire time range unless use_count_query or use_terms_query is set # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get('use_count_query'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback for both real data and json data client.writeback_es = None with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: print("\nWould have written the following documents to elastalert_status:\n") for call in mock_writeback.call_args_list: print("%s - %s\n" % (call[0][0], call[0][1]))
def run_elastalert(self, rule, conf): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Load and instantiate rule # Pass an args containing the context of whether we're alerting or not # It is needed to prevent unnecessary initialization of unused alerters load_modules_args = argparse.Namespace() load_modules_args.debug = not self.args.alert conf['rules_loader'].load_modules(rule, load_modules_args) # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if self.args.json: if not self.data: return None try: self.data.sort(key=lambda x: x[timestamp_field]) self.starttime = self.str_to_ts(self.data[0][timestamp_field]) self.endtime = self.str_to_ts( self.data[-1][timestamp_field]) + datetime.timedelta( seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) if self.args.stop_error: exit(4) return None # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join( [random.choice(string.ascii_letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: # Updating starttime based on timeframe rule if "timeframe" in rule: self.starttime = self.parse_starttime( timeframe=rule["timeframe"]) # Set run_every to cover the entire time range unless count query, terms query or agg query used # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get( 'use_count_query') and not rule.get( 'aggregation_query_element'): conf['run_every'] = self.endtime - self.starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch.object(conf['rules_loader'], 'get_hashes'): with mock.patch.object(conf['rules_loader'], 'load') as load_rules: load_rules.return_value = [rule] with mock.patch( 'elastalert.elastalert.load_conf') as load_conf: load_conf.return_value = conf if self.args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if self.args.json: self.mock_elastalert(client) # Mock writeback to return empty results client.writeback_es = mock.MagicMock() client.writeback_es.search.return_value = {"hits": {"hits": []}} with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, self.endtime, self.starttime) if mock_writeback.call_count: if self.args.formatted_output: self.formatted_output['writeback'] = {} else: print( "\nWould have written the following documents to writeback index (default is elastalert_status):\n" ) errors = False for call in mock_writeback.call_args_list: if self.args.formatted_output: self.formatted_output['writeback'][ call[0][0]] = json.loads( json.dumps(call[0][1], default=str)) else: print("%s - %s\n" % (call[0][0], call[0][1])) if call[0][0] == 'elastalert_error': errors = True if errors and self.args.stop_error: exit(2)
def test_file(self, conf): """Loads a rule config file, performs a query over the last day (self.args.days), lists available keys and prints the number of results.""" if self.args.schema_only: return [] # Set up Elasticsearch client and query es_client = elasticsearch_client(conf) ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=self.starttime, endtime=self.endtime, timestamp_field=ts, to_ts_func=conf['dt_to_ts']) index = ElastAlerter.get_index(conf, self.starttime, self.endtime) # Get one document for schema try: res = es_client.search(index=index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if self.args.stop_error: exit(3) return None num_hits = len(res['hits']['hits']) if not num_hits: print("Didn't get any results.") return [] terms = res['hits']['hits'][0]['_source'] # Get a count of all docs count_query = ElastAlerter.get_query(conf['filter'], starttime=self.starttime, endtime=self.endtime, timestamp_field=ts, to_ts_func=conf['dt_to_ts'], sort=False) try: res = es_client.count(index=index, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if self.args.stop_error: exit(2) return None num_hits = res['count'] if self.args.formatted_output: self.formatted_output['hits'] = num_hits self.formatted_output['days'] = self.args.days self.formatted_output['terms'] = list(terms.keys()) self.formatted_output['result'] = terms else: print( "Got %s hits from the last %s day%s" % (num_hits, self.args.days, "s" if self.args.days > 1 else "")) print("\nAvailable terms in first hit:") print_terms(terms, '') # Check for missing keys pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term), file=sys.stderr) if not self.args.formatted_output: print('') # Newline # Download up to max_query_size (defaults to 10,000) documents to save if (self.args.save or self.args.formatted_output) and not self.args.count: try: res = es_client.search(index=index, size=self.args.max_query_size, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if self.args.stop_error: exit(2) return None num_hits = len(res['hits']['hits']) if self.args.save: print("Downloaded %s documents to save" % (num_hits)) return res['hits']['hits']
def run_elastalert(self, rule, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Mock configuration. Nothing here is used except run_every conf = {'rules_folder': 'rules', 'run_every': datetime.timedelta(minutes=5), 'buffer_time': datetime.timedelta(minutes=45), 'alert_time_limit': datetime.timedelta(hours=24), 'es_host': 'es', 'es_port': 14900, 'writeback_index': 'wb', 'max_query_size': 10000, 'old_query_limit': datetime.timedelta(weeks=1), 'disable_rules_on_error': False} # Load and instantiate rule load_options(rule, conf) load_modules(rule) conf['rules'] = [rule] # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return None try: self.data.sort(key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) return None # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join([random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: endtime = ts_now() starttime = endtime - datetime.timedelta(days=args.days) # Set run_every to cover the entire time range unless use_count_query or use_terms_query is set # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get('use_count_query'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback for both real data and json data client.writeback_es = None with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: print("\nWould have written the following documents to elastalert_status:\n") for call in mock_writeback.call_args_list: print("%s - %s\n" % (call[0][0], call[0][1]))
def run_elastalert(self, rule, conf, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Load and instantiate rule # Pass an args containing the context of whether we're alerting or not # It is needed to prevent unnecessary initialization of unused alerters load_modules_args = argparse.Namespace() load_modules_args.debug = not args.alert load_modules(rule, load_modules_args) conf['rules'] = [rule] # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return None try: self.data.sort(key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) if args.stop_error: exit(1) return None # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join([random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: if args.end: if args.end == 'NOW': endtime = ts_now() else: try: endtime = ts_to_dt(args.end) except (TypeError, ValueError): self.handle_error("%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (args.end)) exit(1) else: endtime = ts_now() if args.start: try: starttime = ts_to_dt(args.start) except (TypeError, ValueError): self.handle_error("%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (args.start)) exit(1) else: # if days given as command line argument if args.days > 0: starttime = endtime - datetime.timedelta(days=args.days) else: # if timeframe is given in rule if 'timeframe' in rule: starttime = endtime - datetime.timedelta(seconds=rule['timeframe'].total_seconds() * 1.01) # default is 1 days / 24 hours else: starttime = endtime - datetime.timedelta(days=1) # Set run_every to cover the entire time range unless count query, terms query or agg query used # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get('use_count_query') and not rule.get('aggregation_query_element'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback to return empty results client.writeback_es = mock.MagicMock() client.writeback_es.search.return_value = {"hits": {"hits": []}} with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: if args.formatted_output: self.formatted_output['writeback'] = {} else: print("\nWould have written the following documents to writeback index (default is elastalert_status):\n") errors = False for call in mock_writeback.call_args_list: if args.formatted_output: self.formatted_output['writeback'][call[0][0]] = json.loads(json.dumps(call[0][1], default=str)) else: print("%s - %s\n" % (call[0][0], call[0][1])) if call[0][0] == 'elastalert_error': errors = True if errors and args.stop_error: exit(1)
def test_file(self, conf, args): """ Loads a rule config file, performs a query over the last day (args.days), lists available keys and prints the number of results. """ if args.schema_only: return [] # Set up Elasticsearch client and query es_client = elasticsearch_client(conf) try: ElastAlerter.modify_rule_for_ES5(conf) except Exception as e: print("Error connecting to ElasticSearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None start_time = ts_now() - datetime.timedelta(days=args.days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query( conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, to_ts_func=conf['dt_to_ts'], five=conf['five'] ) index = ElastAlerter.get_index(conf, start_time, end_time) # Get one document for schema try: res = es_client.search(index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None num_hits = len(res['hits']['hits']) if not num_hits: return [] terms = res['hits']['hits'][0]['_source'] doc_type = res['hits']['hits'][0]['_type'] # Get a count of all docs count_query = ElastAlerter.get_query( conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, to_ts_func=conf['dt_to_ts'], sort=False, five=conf['five'] ) try: res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None num_hits = res['count'] if args.formatted_output: self.formatted_output['hits'] = num_hits self.formatted_output['days'] = args.days self.formatted_output['terms'] = terms.keys() self.formatted_output['result'] = terms else: print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else '')) print("\nAvailable terms in first hit:") print_terms(terms, '') # Check for missing keys pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term), file=sys.stderr) if not args.formatted_output: print('') # Newline # Download up to max_query_size (defaults to 10,000) documents to save if (args.save or args.formatted_output) and not args.count: try: res = es_client.search(index, size=args.max_query_size, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None num_hits = len(res['hits']['hits']) if args.save: print("Downloaded %s documents to save" % (num_hits)) return res['hits']['hits']
def test_file(self, args): """ Loads a rule config file, performs a query over the last day (args.days), lists available keys and prints the number of results. """ filename = args.file with open(filename) as fh: conf = yaml.load(fh) load_options(conf) print("Successfully loaded %s\n" % (conf["name"])) if args.schema_only: return [] # Set up elasticsearch client and query es_client = Elasticsearch(host=conf["es_host"], port=conf["es_port"]) start_time = ts_now() - datetime.timedelta(days=args.days) end_time = ts_now() ts = conf.get("timestamp_field", "@timestamp") query = ElastAlerter.get_query(conf["filter"], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) # Get one document for schema try: res = es_client.search(index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res["hits"]["hits"]) if not num_hits: return [] terms = res["hits"]["hits"][0]["_source"] doc_type = res["hits"]["hits"][0]["_type"] # Get a count of all docs count_query = ElastAlerter.get_query( conf["filter"], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False ) count_query = {"query": {"filtered": count_query}} try: res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = res["count"] print("Got %s hits from the last %s day%s" % (num_hits, args.days, "s" if args.days > 1 else "")) print("\nAvailable terms in first hit:") print_terms(terms, "") # Check for missing keys pk = conf.get("primary_key") ck = conf.get("compare_key") if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get("include") if include: for term in include: if not lookup_es_key(terms, term) and "*" not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get("top_count_keys", []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not ( term.endswith(".raw") and term[:-4] in terms and index.startswith("logstash") ): print("top_count_key %s may be missing" % (term), file=sys.stderr) print("") # Newline # Download up to 10,000 documents to save if args.save and not args.count: try: res = es_client.search(index, size=10000, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res["hits"]["hits"]) print("Downloaded %s documents to save" % (num_hits)) return res["hits"]["hits"] return None
def run_elastalert(self, rule, conf, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Load and instantiate rule load_modules(rule) conf['rules'] = [rule] # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return None try: self.data.sort(key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) if args.stop_error: exit(1) return None # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join( [random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: endtime = ts_now() starttime = endtime - datetime.timedelta(days=args.days) # Set run_every to cover the entire time range unless count query, terms query or agg query used # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get( 'use_count_query') and not rule.get( 'aggregation_query_element'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback to return empty results client.writeback_es = mock.MagicMock() client.writeback_es.search.return_value = {"hits": {"hits": []}} with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: print( "\nWould have written the following documents to writeback index (default is elastalert_status):\n" ) errors = False for call in mock_writeback.call_args_list: print("%s - %s\n" % (call[0][0], call[0][1])) if call[0][0] == 'elastalert_error': errors = True if errors and args.stop_error: exit(1)
def run_elastalert(self, rule, conf, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Load and instantiate rule load_modules(rule) conf['rules'] = [rule] # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return None try: self.data.sort(key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) return None # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join([random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: endtime = ts_now() starttime = endtime - datetime.timedelta(days=args.days) # Set run_every to cover the entire time range unless count query, terms query or agg query used # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get('use_count_query') and not rule.get('aggregation_query_element'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback to return empty results client.writeback_es = mock.MagicMock() client.writeback_es.search.return_value = {"hits": {"hits": []}} with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: print("\nWould have written the following documents to writeback index (default is elastalert_status):\n") for call in mock_writeback.call_args_list: print("%s - %s\n" % (call[0][0], call[0][1]))
def test_file(self, conf, args): """ Loads a rule config file, performs a query over the last day (args.days), lists available keys and prints the number of results. """ if args.schema_only: return [] # Set up Elasticsearch client and query es_client = elasticsearch_client(conf) try: is_five = es_client.info()['version']['number'].startswith('5') except Exception as e: print("Error connecting to ElasticSearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None if is_five: ElastAlerter.modify_rule_for_ES5(conf) start_time = ts_now() - datetime.timedelta(days=args.days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, five=is_five) print('test query: ' + str(query)) index = ElastAlerter.get_index(conf, start_time, end_time) # Get one document for schema try: res = es_client.search(index, size=1, body=query, ignore_unavailable=True) print('test res: ' + str(res)) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None num_hits = len(res['hits']['hits']) if not num_hits: return [] terms = res['hits']['hits'][0]['_source'] doc_type = res['hits']['hits'][0]['_type'] # Get a count of all docs count_query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False, five=is_five) try: res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None num_hits = res['count'] print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else '')) print("\nAvailable terms in first hit:") print_terms(terms, '') # Check for missing keys pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term), file=sys.stderr) print('') # Newline # Download up to 10,000 documents to save if args.save and not args.count: try: res = es_client.search(index, size=10000, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None num_hits = len(res['hits']['hits']) print("Downloaded %s documents to save" % (num_hits)) return res['hits']['hits']
def run_elastalert(self, rule, conf, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Load and instantiate rule # Pass an args containing the context of whether we're alerting or not # It is needed to prevent unnecessary initialization of unused alerters load_modules_args = argparse.Namespace() load_modules_args.debug = not args.alert load_modules(rule, load_modules_args) conf['rules'] = [rule] # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return None try: sorted(self.data, key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) if args.stop_error: exit(1) return None # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join( [random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: if args.end: if args.end == 'NOW': endtime = ts_now() else: try: endtime = ts_to_dt(args.end) except (TypeError, ValueError): self.handle_error( "%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (args.end)) exit(1) else: endtime = ts_now() if args.start: try: starttime = ts_to_dt(args.start) except (TypeError, ValueError): self.handle_error( "%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (args.start)) exit(1) else: # if days given as command line argument if args.days > 0: starttime = endtime - datetime.timedelta(days=args.days) else: # if timeframe is given in rule if 'timeframe' in rule: starttime = endtime - datetime.timedelta( seconds=rule['timeframe'].total_seconds() * 1.01) # default is 1 days / 24 hours else: starttime = endtime - datetime.timedelta(days=1) # Set run_every to cover the entire time range unless count query, terms query or agg query used # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get( 'use_count_query') and not rule.get( 'aggregation_query_element'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback to return empty results client.writeback_es = mock.MagicMock() client.writeback_es.search.return_value = {"hits": {"hits": []}} with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: if args.formatted_output: self.formatted_output['writeback'] = {} else: print( "\nWould have written the following documents to writeback index (default is elastalert_status):\n" ) errors = False for call in mock_writeback.call_args_list: if args.formatted_output: self.formatted_output['writeback'][ call[0][0]] = json.loads( json.dumps(call[0][1], default=str)) else: print("%s - %s\n" % (call[0][0], call[0][1])) if call[0][0] == 'elastalert_error': errors = True if errors and args.stop_error: exit(1)