def check_files(): print("Note: This tool is for testing filters and config syntax. It will not process data or alert.\n") parser = argparse.ArgumentParser(description='Validate a rule configuration') parser.add_argument('files', metavar='file', type=str, nargs='+', help='rule configuration filename') parser.add_argument('--schema-only', action='store_true', help='Show only schema errors; do not run query') parser.add_argument('--days', type=int, default=[1, 7], nargs='+', help='Query the previous N days with this rule') args = parser.parse_args() for filename in args.files: with open(filename) as fh: conf = yaml.load(fh) load_options(conf) print("Successfully loaded %s\n" % (conf['name'])) if args.schema_only: continue es_client = Elasticsearch(host=conf['es_host'], port=conf['es_port']) for days in args.days: start_time = ts_now() - datetime.timedelta(days=days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) try: res = es_client.search(index, size=1000, body=query) except Exception as e: print("Error running your filter:") print(repr(e)[:2048]) exit(1) num_hits = len(res['hits']['hits']) print("Got %s hits from the last %s day%s" % (num_hits if num_hits != 1000 else '1000+', days, 's' if days > 1 else '')) if num_hits: print("\nAvailable terms in first hit:") terms = res['hits']['hits'][0]['_source'] print_terms(terms, '') pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!") if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!") include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term)) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term)) print('')
def test_compound_query_key(): test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('use_count_query') test_rule_copy['query_key'] = ['field1', 'field2'] load_options(test_rule_copy, test_config, 'filename.yaml') assert 'field1' in test_rule_copy['include'] assert 'field2' in test_rule_copy['include'] assert test_rule_copy['query_key'] == 'field1,field2' assert test_rule_copy['compound_query_key'] == ['field1', 'field2']
def check_files(): print("Note: This tool is for testing filters and config syntax. It will not process data or alert.\n") parser = argparse.ArgumentParser(description='Validate a rule configuration') parser.add_argument('files', metavar='file', type=str, nargs='+', help='rule configuration filename') parser.add_argument('--days', type=int, default=[1, 7], nargs='+', help='Query the previous N days with this rule') args = parser.parse_args() for filename in args.files: with open(filename) as fh: conf = yaml.load(fh) load_options(conf) print("Successfully loaded %s\n" % (conf['name'])) es_client = Elasticsearch(host=conf['es_host'], port=conf['es_port']) for days in args.days: start_time = ts_now() - datetime.timedelta(days=days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) try: res = es_client.search(index, size=1000, body=query) except Exception as e: print("Error running your filter:") print(repr(e)[:2048]) exit(1) num_hits = len(res['hits']['hits']) print("Got %s hits from the last %s day%s" % (num_hits if num_hits != 1000 else '1000+', days, 's' if days > 1 else '')) if num_hits: print("\nAvailable terms in first hit:") terms = res['hits']['hits'][0]['_source'] print_terms(terms, '') pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!") if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!") include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term)) for term in conf.get('top_count_keys', []): if term not in terms: print("top_count_key %s may be missing" % (term)) print('')
def load_conf(self, rules, args): """ Loads a default conf dictionary (from global config file, if provided, or hard-coded mocked data), for initializing rules. Also initializes rules. :return: the default rule configuration, a dictionary """ if args.config is not None: with open(args.config) as fh: conf = yaml.load(fh) else: if os.path.isfile('config.yaml'): with open('config.yaml') as fh: conf = yaml.load(fh) else: conf = {} # Need to convert these parameters to datetime objects for key in [ 'buffer_time', 'run_every', 'alert_time_limit', 'old_query_limit' ]: if key in conf: conf[key] = datetime.timedelta(**conf[key]) # Mock configuration. This specifies the base values for attributes, unless supplied otherwise. conf_default = { 'rules_folder': 'rules', 'es_host': 'localhost', 'es_port': 14900, 'writeback_index': 'wb', 'max_query_size': 10000, 'alert_time_limit': datetime.timedelta(hours=24), 'old_query_limit': datetime.timedelta(weeks=1), 'run_every': datetime.timedelta(minutes=5), 'disable_rules_on_error': False, 'buffer_time': datetime.timedelta(minutes=45), 'scroll_keepalive': '30s' } for key in conf_default: if key not in conf: conf[key] = conf_default[key] elastalert.config.base_config = copy.deepcopy(conf) load_options(rules, conf, args.file) if args.formatted_output: self.formatted_output['success'] = True self.formatted_output['name'] = rules['name'] else: print("Successfully loaded %s\n" % (rules['name'])) return conf
def load_conf(self, rules, args): """ Loads a default conf dictionary (from global config file, if provided, or hard-coded mocked data), for initializing rules. Also initializes rules. :return: the default rule configuration, a dictionary """ if args.config is not None: with open(args.config) as fh: conf = yaml.load(fh) else: if os.path.isfile('config.yaml'): with open('config.yaml') as fh: conf = yaml.load(fh) else: conf = {} # Need to convert these parameters to datetime objects for key in ['buffer_time', 'run_every', 'alert_time_limit', 'old_query_limit']: if key in conf: conf[key] = datetime.timedelta(**conf[key]) # Mock configuration. This specifies the base values for attributes, unless supplied otherwise. conf_default = { 'rules_folder': 'rules', 'es_host': 'localhost', 'es_port': 14900, 'writeback_index': 'wb', 'max_query_size': 10000, 'alert_time_limit': datetime.timedelta(hours=24), 'old_query_limit': datetime.timedelta(weeks=1), 'run_every': datetime.timedelta(minutes=5), 'disable_rules_on_error': False, 'buffer_time': datetime.timedelta(minutes=45), 'scroll_keepalive': '30s' } for key in conf_default: if key not in conf: conf[key] = conf_default[key] elastalert.config.base_config = copy.deepcopy(conf) load_options(rules, conf, args.file) if args.formatted_output: self.formatted_output['success'] = True self.formatted_output['name'] = rules['name'] else: print("Successfully loaded %s\n" % (rules['name'])) return conf
def test_name_inference(): test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('name') load_options(test_rule_copy, test_config, 'msmerc woz ere.yaml') assert test_rule_copy['name'] == 'msmerc woz ere'
def test_file(self, conf, args): """ Loads a rule config file, performs a query over the last day (args.days), lists available keys and prints the number of results. """ load_options(conf, {}) print("Successfully loaded %s\n" % (conf['name'])) if args.schema_only: return [] # Set up elasticsearch client and query es_config = ElastAlerter.build_es_conn_config(conf) es_client = ElastAlerter.new_elasticsearch(es_config) start_time = ts_now() - datetime.timedelta(days=args.days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) # Get one document for schema try: res = es_client.search(index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res['hits']['hits']) if not num_hits: return [] terms = res['hits']['hits'][0]['_source'] doc_type = res['hits']['hits'][0]['_type'] # Get a count of all docs count_query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False) count_query = {'query': {'filtered': count_query}} try: res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = res['count'] print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else '')) print("\nAvailable terms in first hit:") print_terms(terms, '') # Check for missing keys pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term), file=sys.stderr) print('') # Newline # Download up to 10,000 documents to save if args.save and not args.count: try: res = es_client.search(index, size=10000, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res['hits']['hits']) print("Downloaded %s documents to save" % (num_hits)) return res['hits']['hits'] return None
def run_elastalert(self, rule, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Mock configuration. Nothing here is used except run_every conf = {'rules_folder': 'rules', 'run_every': datetime.timedelta(minutes=5), 'buffer_time': datetime.timedelta(minutes=45), 'alert_time_limit': datetime.timedelta(hours=24), 'es_host': 'es', 'es_port': 14900, 'writeback_index': 'wb', 'max_query_size': 100000, 'old_query_limit': datetime.timedelta(weeks=1), 'disable_rules_on_error': False} # Load and instantiate rule load_options(rule, conf) load_modules(rule) conf['rules'] = [rule] # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return try: self.data.sort(key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) return # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join([random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: endtime = ts_now() starttime = endtime - datetime.timedelta(days=args.days) # Set run_every to cover the entire time range unless use_count_query or use_terms_query is set # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get('use_count_query'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback for both real data and json data client.writeback_es = None with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: print("\nWould have written the following documents to elastalert_status:\n") for call in mock_writeback.call_args_list: print("%s - %s\n" % (call[0][0], call[0][1]))
def test_file(self, conf, args): """ Loads a rule config file, performs a query over the last day (args.days), lists available keys and prints the number of results. """ load_options(conf, {}) print("Successfully loaded %s\n" % (conf['name'])) if args.schema_only: return [] # Set up elasticsearch client and query es_config = ElastAlerter.build_es_conn_config(conf) es_client = ElastAlerter.new_elasticsearch(es_config) start_time = ts_now() - datetime.timedelta(days=args.days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) # Get one document for schema try: res = es_client.search(index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res['hits']['hits']) if not num_hits: return [] terms = res['hits']['hits'][0]['_source'] doc_type = res['hits']['hits'][0]['_type'] # Get a count of all docs count_query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False) count_query = {'query': {'filtered': count_query}} try: res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = res['count'] print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else '')) print("\nAvailable terms in first hit:") print_terms(terms, '') # Check for missing keys pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term), file=sys.stderr) print('') # Newline # Download up to 10,000 documents to save if args.save and not args.count: try: res = es_client.search(index, size=10000, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res['hits']['hits']) print("Downloaded %s documents to save" % (num_hits)) return res['hits']['hits']
def run_elastalert(self, rule, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Mock configuration. Nothing here is used except run_every conf = {'rules_folder': 'rules', 'run_every': datetime.timedelta(minutes=5), 'buffer_time': datetime.timedelta(minutes=45), 'alert_time_limit': datetime.timedelta(hours=24), 'es_host': 'es', 'es_port': 14900, 'writeback_index': 'wb', 'max_query_size': 10000, 'old_query_limit': datetime.timedelta(weeks=1), 'disable_rules_on_error': False} # Load and instantiate rule load_options(rule, conf) load_modules(rule) conf['rules'] = [rule] # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return None try: self.data.sort(key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) return None # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join([random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: endtime = ts_now() starttime = endtime - datetime.timedelta(days=args.days) # Set run_every to cover the entire time range unless use_count_query or use_terms_query is set # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get('use_count_query'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch('elastalert.elastalert.get_rule_hashes'): with mock.patch('elastalert.elastalert.load_rules') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback for both real data and json data client.writeback_es = None with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: print("\nWould have written the following documents to elastalert_status:\n") for call in mock_writeback.call_args_list: print("%s - %s\n" % (call[0][0], call[0][1]))
def test_file(self, args): """ Loads a rule config file, performs a query over the last day (args.days), lists available keys and prints the number of results. """ filename = args.file with open(filename) as fh: conf = yaml.load(fh) load_options(conf) print("Successfully loaded %s\n" % (conf["name"])) if args.schema_only: return [] # Set up elasticsearch client and query es_client = Elasticsearch(host=conf["es_host"], port=conf["es_port"]) start_time = ts_now() - datetime.timedelta(days=args.days) end_time = ts_now() ts = conf.get("timestamp_field", "@timestamp") query = ElastAlerter.get_query(conf["filter"], starttime=start_time, endtime=end_time, timestamp_field=ts) index = ElastAlerter.get_index(conf, start_time, end_time) # Get one document for schema try: res = es_client.search(index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res["hits"]["hits"]) if not num_hits: return [] terms = res["hits"]["hits"][0]["_source"] doc_type = res["hits"]["hits"][0]["_type"] # Get a count of all docs count_query = ElastAlerter.get_query( conf["filter"], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False ) count_query = {"query": {"filtered": count_query}} try: res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = res["count"] print("Got %s hits from the last %s day%s" % (num_hits, args.days, "s" if args.days > 1 else "")) print("\nAvailable terms in first hit:") print_terms(terms, "") # Check for missing keys pk = conf.get("primary_key") ck = conf.get("compare_key") if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get("include") if include: for term in include: if not lookup_es_key(terms, term) and "*" not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get("top_count_keys", []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not ( term.endswith(".raw") and term[:-4] in terms and index.startswith("logstash") ): print("top_count_key %s may be missing" % (term), file=sys.stderr) print("") # Newline # Download up to 10,000 documents to save if args.save and not args.count: try: res = es_client.search(index, size=10000, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) return None num_hits = len(res["hits"]["hits"]) print("Downloaded %s documents to save" % (num_hits)) return res["hits"]["hits"] return None