示例#1
0
def check_files():
    print("Note: This tool is for testing filters and config syntax. It will not process data or alert.\n")
    parser = argparse.ArgumentParser(description='Validate a rule configuration')
    parser.add_argument('files', metavar='file', type=str, nargs='+', help='rule configuration filename')
    parser.add_argument('--schema-only', action='store_true', help='Show only schema errors; do not run query')
    parser.add_argument('--days', type=int, default=[1, 7], nargs='+', help='Query the previous N days with this rule')
    args = parser.parse_args()

    for filename in args.files:
        with open(filename) as fh:
            conf = yaml.load(fh)
        load_options(conf)
        print("Successfully loaded %s\n" % (conf['name']))

        if args.schema_only:
            continue

        es_client = Elasticsearch(host=conf['es_host'], port=conf['es_port'])
        for days in args.days:
            start_time = ts_now() - datetime.timedelta(days=days)
            end_time = ts_now()
            ts = conf.get('timestamp_field', '@timestamp')
            query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts)
            index = ElastAlerter.get_index(conf, start_time, end_time)
            try:
                res = es_client.search(index, size=1000, body=query)
            except Exception as e:
                print("Error running your filter:")
                print(repr(e)[:2048])
                exit(1)

            num_hits = len(res['hits']['hits'])
            print("Got %s hits from the last %s day%s" % (num_hits if num_hits != 1000 else '1000+', days,
                                                          's' if days > 1 else ''))

        if num_hits:
            print("\nAvailable terms in first hit:")
            terms = res['hits']['hits'][0]['_source']
            print_terms(terms, '')

            pk = conf.get('primary_key')
            ck = conf.get('compare_key')
            if pk and not lookup_es_key(terms, pk):
                print("Warning: primary key %s is either missing or null!")
            if ck and not lookup_es_key(terms, ck):
                print("Warning: compare key %s is either missing or null!")

            include = conf.get('include')
            if include:
                for term in include:
                    if not lookup_es_key(terms, term) and '*' not in term:
                        print("Included term %s may be missing or null" % (term))

            for term in conf.get('top_count_keys', []):
                # If the index starts with 'logstash', fields with .raw will be available but won't in _source
                if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')):
                    print("top_count_key %s may be missing" % (term))
        print('')
示例#2
0
def test_compound_query_key():
    test_rule_copy = copy.deepcopy(test_rule)
    test_rule_copy.pop('use_count_query')
    test_rule_copy['query_key'] = ['field1', 'field2']
    load_options(test_rule_copy, test_config, 'filename.yaml')
    assert 'field1' in test_rule_copy['include']
    assert 'field2' in test_rule_copy['include']
    assert test_rule_copy['query_key'] == 'field1,field2'
    assert test_rule_copy['compound_query_key'] == ['field1', 'field2']
示例#3
0
def test_compound_query_key():
    test_rule_copy = copy.deepcopy(test_rule)
    test_rule_copy.pop('use_count_query')
    test_rule_copy['query_key'] = ['field1', 'field2']
    load_options(test_rule_copy, test_config, 'filename.yaml')
    assert 'field1' in test_rule_copy['include']
    assert 'field2' in test_rule_copy['include']
    assert test_rule_copy['query_key'] == 'field1,field2'
    assert test_rule_copy['compound_query_key'] == ['field1', 'field2']
示例#4
0
def check_files():
    print("Note: This tool is for testing filters and config syntax. It will not process data or alert.\n")
    parser = argparse.ArgumentParser(description='Validate a rule configuration')
    parser.add_argument('files', metavar='file', type=str, nargs='+', help='rule configuration filename')
    parser.add_argument('--days', type=int, default=[1, 7], nargs='+', help='Query the previous N days with this rule')
    args = parser.parse_args()

    for filename in args.files:
        with open(filename) as fh:
            conf = yaml.load(fh)
        load_options(conf)
        print("Successfully loaded %s\n" % (conf['name']))

        es_client = Elasticsearch(host=conf['es_host'], port=conf['es_port'])
        for days in args.days:
            start_time = ts_now() - datetime.timedelta(days=days)
            end_time = ts_now()
            ts = conf.get('timestamp_field', '@timestamp')
            query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts)
            index = ElastAlerter.get_index(conf, start_time, end_time)
            try:
                res = es_client.search(index, size=1000, body=query)
            except Exception as e:
                print("Error running your filter:")
                print(repr(e)[:2048])
                exit(1)

            num_hits = len(res['hits']['hits'])
            print("Got %s hits from the last %s day%s" % (num_hits if num_hits != 1000 else '1000+', days,
                                                          's' if days > 1 else ''))

        if num_hits:
            print("\nAvailable terms in first hit:")
            terms = res['hits']['hits'][0]['_source']
            print_terms(terms, '')

            pk = conf.get('primary_key')
            ck = conf.get('compare_key')
            if pk and not lookup_es_key(terms, pk):
                print("Warning: primary key %s is either missing or null!")
            if ck and not lookup_es_key(terms, ck):
                print("Warning: compare key %s is either missing or null!")

            include = conf.get('include')
            if include:
                for term in include:
                    if not lookup_es_key(terms, term) and '*' not in term:
                        print("Included term %s may be missing or null" % (term))

            for term in conf.get('top_count_keys', []):
                if term not in terms:
                    print("top_count_key %s may be missing" % (term))
        print('')
示例#5
0
    def load_conf(self, rules, args):
        """ Loads a default conf dictionary (from global config file, if provided, or hard-coded mocked data),
            for initializing rules. Also initializes rules.

            :return: the default rule configuration, a dictionary """
        if args.config is not None:
            with open(args.config) as fh:
                conf = yaml.load(fh)
        else:
            if os.path.isfile('config.yaml'):
                with open('config.yaml') as fh:
                    conf = yaml.load(fh)
            else:
                conf = {}

        # Need to convert these parameters to datetime objects
        for key in [
                'buffer_time', 'run_every', 'alert_time_limit',
                'old_query_limit'
        ]:
            if key in conf:
                conf[key] = datetime.timedelta(**conf[key])

        # Mock configuration. This specifies the base values for attributes, unless supplied otherwise.
        conf_default = {
            'rules_folder': 'rules',
            'es_host': 'localhost',
            'es_port': 14900,
            'writeback_index': 'wb',
            'max_query_size': 10000,
            'alert_time_limit': datetime.timedelta(hours=24),
            'old_query_limit': datetime.timedelta(weeks=1),
            'run_every': datetime.timedelta(minutes=5),
            'disable_rules_on_error': False,
            'buffer_time': datetime.timedelta(minutes=45),
            'scroll_keepalive': '30s'
        }

        for key in conf_default:
            if key not in conf:
                conf[key] = conf_default[key]
        elastalert.config.base_config = copy.deepcopy(conf)
        load_options(rules, conf, args.file)

        if args.formatted_output:
            self.formatted_output['success'] = True
            self.formatted_output['name'] = rules['name']
        else:
            print("Successfully loaded %s\n" % (rules['name']))

        return conf
示例#6
0
    def load_conf(self, rules, args):
        """ Loads a default conf dictionary (from global config file, if provided, or hard-coded mocked data),
            for initializing rules. Also initializes rules.

            :return: the default rule configuration, a dictionary """
        if args.config is not None:
            with open(args.config) as fh:
                conf = yaml.load(fh)
        else:
            if os.path.isfile('config.yaml'):
                with open('config.yaml') as fh:
                    conf = yaml.load(fh)
            else:
                conf = {}

        # Need to convert these parameters to datetime objects
        for key in ['buffer_time', 'run_every', 'alert_time_limit', 'old_query_limit']:
            if key in conf:
                conf[key] = datetime.timedelta(**conf[key])

        # Mock configuration. This specifies the base values for attributes, unless supplied otherwise.
        conf_default = {
            'rules_folder': 'rules',
            'es_host': 'localhost',
            'es_port': 14900,
            'writeback_index': 'wb',
            'max_query_size': 10000,
            'alert_time_limit': datetime.timedelta(hours=24),
            'old_query_limit': datetime.timedelta(weeks=1),
            'run_every': datetime.timedelta(minutes=5),
            'disable_rules_on_error': False,
            'buffer_time': datetime.timedelta(minutes=45),
            'scroll_keepalive': '30s'
        }

        for key in conf_default:
            if key not in conf:
                conf[key] = conf_default[key]
        elastalert.config.base_config = copy.deepcopy(conf)
        load_options(rules, conf, args.file)

        if args.formatted_output:
            self.formatted_output['success'] = True
            self.formatted_output['name'] = rules['name']
        else:
            print("Successfully loaded %s\n" % (rules['name']))

        return conf
示例#7
0
def test_name_inference():
    test_rule_copy = copy.deepcopy(test_rule)
    test_rule_copy.pop('name')
    load_options(test_rule_copy, test_config, 'msmerc woz ere.yaml')
    assert test_rule_copy['name'] == 'msmerc woz ere'
示例#8
0
    def test_file(self, conf, args):
        """ Loads a rule config file, performs a query over the last day (args.days), lists available keys
        and prints the number of results. """
        load_options(conf, {})
        print("Successfully loaded %s\n" % (conf['name']))

        if args.schema_only:
            return []

        # Set up elasticsearch client and query
        es_config = ElastAlerter.build_es_conn_config(conf)
        es_client = ElastAlerter.new_elasticsearch(es_config)
        start_time = ts_now() - datetime.timedelta(days=args.days)
        end_time = ts_now()
        ts = conf.get('timestamp_field', '@timestamp')
        query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts)
        index = ElastAlerter.get_index(conf, start_time, end_time)

        # Get one document for schema
        try:
            res = es_client.search(index, size=1, body=query, ignore_unavailable=True)
        except Exception as e:
            print("Error running your filter:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None
        num_hits = len(res['hits']['hits'])
        if not num_hits:
            return []

        terms = res['hits']['hits'][0]['_source']
        doc_type = res['hits']['hits'][0]['_type']

        # Get a count of all docs
        count_query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False)
        count_query = {'query': {'filtered': count_query}}
        try:
            res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True)
        except Exception as e:
            print("Error querying Elasticsearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None

        num_hits = res['count']
        print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else ''))
        print("\nAvailable terms in first hit:")
        print_terms(terms, '')

        # Check for missing keys
        pk = conf.get('primary_key')
        ck = conf.get('compare_key')
        if pk and not lookup_es_key(terms, pk):
            print("Warning: primary key %s is either missing or null!", file=sys.stderr)
        if ck and not lookup_es_key(terms, ck):
            print("Warning: compare key %s is either missing or null!", file=sys.stderr)

        include = conf.get('include')
        if include:
            for term in include:
                if not lookup_es_key(terms, term) and '*' not in term:
                    print("Included term %s may be missing or null" % (term), file=sys.stderr)

        for term in conf.get('top_count_keys', []):
            # If the index starts with 'logstash', fields with .raw will be available but won't in _source
            if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')):
                print("top_count_key %s may be missing" % (term), file=sys.stderr)
        print('')  # Newline

        # Download up to 10,000 documents to save
        if args.save and not args.count:
            try:
                res = es_client.search(index, size=10000, body=query, ignore_unavailable=True)
            except Exception as e:
                print("Error running your filter:", file=sys.stderr)
                print(repr(e)[:2048], file=sys.stderr)
                return None
            num_hits = len(res['hits']['hits'])
            print("Downloaded %s documents to save" % (num_hits))
            return res['hits']['hits']

        return None
示例#9
0
    def run_elastalert(self, rule, args):
        """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """
        # Mock configuration. Nothing here is used except run_every
        conf = {'rules_folder': 'rules',
                'run_every': datetime.timedelta(minutes=5),
                'buffer_time': datetime.timedelta(minutes=45),
                'alert_time_limit': datetime.timedelta(hours=24),
                'es_host': 'es',
                'es_port': 14900,
                'writeback_index': 'wb',
                'max_query_size': 100000,
                'old_query_limit': datetime.timedelta(weeks=1),
                'disable_rules_on_error': False}

        # Load and instantiate rule
        load_options(rule, conf)
        load_modules(rule)
        conf['rules'] = [rule]

        # If using mock data, make sure it's sorted and find appropriate time range
        timestamp_field = rule.get('timestamp_field', '@timestamp')
        if args.json:
            if not self.data:
                return
            try:
                self.data.sort(key=lambda x: x[timestamp_field])
                starttime = ts_to_dt(self.data[0][timestamp_field])
                endtime = self.data[-1][timestamp_field]
                endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1)
            except KeyError as e:
                print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr)
                return

            # Create mock _id for documents if it's missing
            used_ids = []

            def get_id():
                _id = ''.join([random.choice(string.letters) for i in range(16)])
                if _id in used_ids:
                    return get_id()
                used_ids.append(_id)
                return _id

            for doc in self.data:
                doc.update({'_id': doc.get('_id', get_id())})
        else:
            endtime = ts_now()
            starttime = endtime - datetime.timedelta(days=args.days)

        # Set run_every to cover the entire time range unless use_count_query or use_terms_query is set
        # This is to prevent query segmenting which unnecessarily slows down tests
        if not rule.get('use_terms_query') and not rule.get('use_count_query'):
            conf['run_every'] = endtime - starttime

        # Instantiate ElastAlert to use mock config and special rule
        with mock.patch('elastalert.elastalert.get_rule_hashes'):
            with mock.patch('elastalert.elastalert.load_rules') as load_conf:
                load_conf.return_value = conf
                if args.alert:
                    client = ElastAlerter(['--verbose'])
                else:
                    client = ElastAlerter(['--debug'])

        # Replace get_hits_* functions to use mock data
        if args.json:
            self.mock_elastalert(client)

        # Mock writeback for both real data and json data
        client.writeback_es = None
        with mock.patch.object(client, 'writeback') as mock_writeback:
            client.run_rule(rule, endtime, starttime)

            if mock_writeback.call_count:
                print("\nWould have written the following documents to elastalert_status:\n")
                for call in mock_writeback.call_args_list:
                    print("%s - %s\n" % (call[0][0], call[0][1]))
示例#10
0
def test_name_inference():
    test_rule_copy = copy.deepcopy(test_rule)
    test_rule_copy.pop('name')
    load_options(test_rule_copy, test_config, 'msmerc woz ere.yaml')
    assert test_rule_copy['name'] == 'msmerc woz ere'
    def test_file(self, conf, args):
        """ Loads a rule config file, performs a query over the last day (args.days), lists available keys
        and prints the number of results. """
        load_options(conf, {})
        print("Successfully loaded %s\n" % (conf['name']))

        if args.schema_only:
            return []

        # Set up elasticsearch client and query
        es_config = ElastAlerter.build_es_conn_config(conf)
        es_client = ElastAlerter.new_elasticsearch(es_config)
        start_time = ts_now() - datetime.timedelta(days=args.days)
        end_time = ts_now()
        ts = conf.get('timestamp_field', '@timestamp')
        query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts)
        index = ElastAlerter.get_index(conf, start_time, end_time)

        # Get one document for schema
        try:
            res = es_client.search(index, size=1, body=query, ignore_unavailable=True)
        except Exception as e:
            print("Error running your filter:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None
        num_hits = len(res['hits']['hits'])
        if not num_hits:
            return []

        terms = res['hits']['hits'][0]['_source']
        doc_type = res['hits']['hits'][0]['_type']

        # Get a count of all docs
        count_query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False)
        count_query = {'query': {'filtered': count_query}}
        try:
            res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True)
        except Exception as e:
            print("Error querying Elasticsearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None

        num_hits = res['count']
        print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else ''))
        print("\nAvailable terms in first hit:")
        print_terms(terms, '')

        # Check for missing keys
        pk = conf.get('primary_key')
        ck = conf.get('compare_key')
        if pk and not lookup_es_key(terms, pk):
            print("Warning: primary key %s is either missing or null!", file=sys.stderr)
        if ck and not lookup_es_key(terms, ck):
            print("Warning: compare key %s is either missing or null!", file=sys.stderr)

        include = conf.get('include')
        if include:
            for term in include:
                if not lookup_es_key(terms, term) and '*' not in term:
                    print("Included term %s may be missing or null" % (term), file=sys.stderr)

        for term in conf.get('top_count_keys', []):
            # If the index starts with 'logstash', fields with .raw will be available but won't in _source
            if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')):
                print("top_count_key %s may be missing" % (term), file=sys.stderr)
        print('')  # Newline

        # Download up to 10,000 documents to save
        if args.save and not args.count:
            try:
                res = es_client.search(index, size=10000, body=query, ignore_unavailable=True)
            except Exception as e:
                print("Error running your filter:", file=sys.stderr)
                print(repr(e)[:2048], file=sys.stderr)
                return None
            num_hits = len(res['hits']['hits'])
            print("Downloaded %s documents to save" % (num_hits))
            return res['hits']['hits']
    def run_elastalert(self, rule, args):
        """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """
        # Mock configuration. Nothing here is used except run_every
        conf = {'rules_folder': 'rules',
                'run_every': datetime.timedelta(minutes=5),
                'buffer_time': datetime.timedelta(minutes=45),
                'alert_time_limit': datetime.timedelta(hours=24),
                'es_host': 'es',
                'es_port': 14900,
                'writeback_index': 'wb',
                'max_query_size': 10000,
                'old_query_limit': datetime.timedelta(weeks=1),
                'disable_rules_on_error': False}

        # Load and instantiate rule
        load_options(rule, conf)
        load_modules(rule)
        conf['rules'] = [rule]

        # If using mock data, make sure it's sorted and find appropriate time range
        timestamp_field = rule.get('timestamp_field', '@timestamp')
        if args.json:
            if not self.data:
                return None
            try:
                self.data.sort(key=lambda x: x[timestamp_field])
                starttime = ts_to_dt(self.data[0][timestamp_field])
                endtime = self.data[-1][timestamp_field]
                endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1)
            except KeyError as e:
                print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr)
                return None

            # Create mock _id for documents if it's missing
            used_ids = []

            def get_id():
                _id = ''.join([random.choice(string.letters) for i in range(16)])
                if _id in used_ids:
                    return get_id()
                used_ids.append(_id)
                return _id

            for doc in self.data:
                doc.update({'_id': doc.get('_id', get_id())})
        else:
            endtime = ts_now()
            starttime = endtime - datetime.timedelta(days=args.days)

        # Set run_every to cover the entire time range unless use_count_query or use_terms_query is set
        # This is to prevent query segmenting which unnecessarily slows down tests
        if not rule.get('use_terms_query') and not rule.get('use_count_query'):
            conf['run_every'] = endtime - starttime

        # Instantiate ElastAlert to use mock config and special rule
        with mock.patch('elastalert.elastalert.get_rule_hashes'):
            with mock.patch('elastalert.elastalert.load_rules') as load_conf:
                load_conf.return_value = conf
                if args.alert:
                    client = ElastAlerter(['--verbose'])
                else:
                    client = ElastAlerter(['--debug'])

        # Replace get_hits_* functions to use mock data
        if args.json:
            self.mock_elastalert(client)

        # Mock writeback for both real data and json data
        client.writeback_es = None
        with mock.patch.object(client, 'writeback') as mock_writeback:
            client.run_rule(rule, endtime, starttime)

            if mock_writeback.call_count:
                print("\nWould have written the following documents to elastalert_status:\n")
                for call in mock_writeback.call_args_list:
                    print("%s - %s\n" % (call[0][0], call[0][1]))
示例#13
0
    def test_file(self, args):
        """ Loads a rule config file, performs a query over the last day (args.days), lists available keys
        and prints the number of results. """
        filename = args.file
        with open(filename) as fh:
            conf = yaml.load(fh)
        load_options(conf)
        print("Successfully loaded %s\n" % (conf["name"]))

        if args.schema_only:
            return []

        # Set up elasticsearch client and query
        es_client = Elasticsearch(host=conf["es_host"], port=conf["es_port"])
        start_time = ts_now() - datetime.timedelta(days=args.days)
        end_time = ts_now()
        ts = conf.get("timestamp_field", "@timestamp")
        query = ElastAlerter.get_query(conf["filter"], starttime=start_time, endtime=end_time, timestamp_field=ts)
        index = ElastAlerter.get_index(conf, start_time, end_time)

        # Get one document for schema
        try:
            res = es_client.search(index, size=1, body=query, ignore_unavailable=True)
        except Exception as e:
            print("Error running your filter:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None
        num_hits = len(res["hits"]["hits"])
        if not num_hits:
            return []

        terms = res["hits"]["hits"][0]["_source"]
        doc_type = res["hits"]["hits"][0]["_type"]

        # Get a count of all docs
        count_query = ElastAlerter.get_query(
            conf["filter"], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False
        )
        count_query = {"query": {"filtered": count_query}}
        try:
            res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True)
        except Exception as e:
            print("Error querying Elasticsearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None

        num_hits = res["count"]
        print("Got %s hits from the last %s day%s" % (num_hits, args.days, "s" if args.days > 1 else ""))
        print("\nAvailable terms in first hit:")
        print_terms(terms, "")

        # Check for missing keys
        pk = conf.get("primary_key")
        ck = conf.get("compare_key")
        if pk and not lookup_es_key(terms, pk):
            print("Warning: primary key %s is either missing or null!", file=sys.stderr)
        if ck and not lookup_es_key(terms, ck):
            print("Warning: compare key %s is either missing or null!", file=sys.stderr)

        include = conf.get("include")
        if include:
            for term in include:
                if not lookup_es_key(terms, term) and "*" not in term:
                    print("Included term %s may be missing or null" % (term), file=sys.stderr)

        for term in conf.get("top_count_keys", []):
            # If the index starts with 'logstash', fields with .raw will be available but won't in _source
            if term not in terms and not (
                term.endswith(".raw") and term[:-4] in terms and index.startswith("logstash")
            ):
                print("top_count_key %s may be missing" % (term), file=sys.stderr)
        print("")  # Newline

        # Download up to 10,000 documents to save
        if args.save and not args.count:
            try:
                res = es_client.search(index, size=10000, body=query, ignore_unavailable=True)
            except Exception as e:
                print("Error running your filter:", file=sys.stderr)
                print(repr(e)[:2048], file=sys.stderr)
                return None
            num_hits = len(res["hits"]["hits"])
            print("Downloaded %s documents to save" % (num_hits))
            return res["hits"]["hits"]

        return None