Exemplo n.º 1
0
    def add_aggregated_alert(self, match, rule):
        """ Save a match as a pending aggregate alert to elasticsearch. """
        if not rule['current_aggregate_id'] or rule['aggregate_alert_time'] < ts_to_dt(match[rule['timestamp_field']]):
            # First match, set alert_time
            match_time = ts_to_dt(match[rule['timestamp_field']])
            alert_time = match_time + rule['aggregation']
            rule['aggregate_alert_time'] = alert_time
            agg_id = None
        else:
            # Already pending aggregation, use existing alert_time
            alert_time = rule['aggregate_alert_time']
            agg_id = rule['current_aggregate_id']
            logging.info('Adding alert for %s to aggregation, next alert at %s' % (rule['name'], alert_time))

        alert_body = self.get_alert_body(match, rule, False, alert_time)
        if agg_id:
            alert_body['aggregate_id'] = agg_id
        res = self.writeback('elastalert', alert_body)

        # If new aggregation, save _id
        if res and not agg_id:
            rule['current_aggregate_id'] = res['_id']

        # Couldn't write the match to ES, save it in memory for now
        if not res:
            rule['agg_matches'].append(match)

        return res
Exemplo n.º 2
0
    def is_silenced(self, rule_name):
        """ Checks if rule_name is currently silenced. Returns false on exception. """
        if rule_name in self.silence_cache:
            if ts_now() < self.silence_cache[rule_name][0]:
                return True
            else:
                return False

        query = {'filter': {'term': {'rule_name': rule_name}},
                 'sort': {'until': {'order': 'desc'}}}

        if self.writeback_es:
            try:
                res = self.writeback_es.search(index=self.writeback_index, doc_type='silence',
                                               size=1, body=query, _source_include=['until', 'exponent'])
            except ElasticsearchException as e:
                self.handle_error("Error while querying for alert silence status: %s" % (e), {'rule': rule_name})

                return False

            if res['hits']['hits']:
                until_ts = res['hits']['hits'][0]['_source']['until']
                exponent = res['hits']['hits'][0]['_source'].get('exponent', 0)
                self.silence_cache[rule_name] = (ts_to_dt(until_ts), exponent)
                if ts_now() < ts_to_dt(until_ts):
                    return True
        return False
Exemplo n.º 3
0
    def start(self):
        """ Periodically go through each rule and run it """
        starttime = self.args.start
        if starttime:
            try:
                starttime = ts_to_dt(starttime)
            except (TypeError, ValueError):
                self.handle_error("%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (starttime))
                exit(1)
        while True:
            # If writeback_es errored, it's disabled until the next query cycle
            if not self.writeback_es:
                self.writeback_es = self.new_elasticsearch(self.es_conn_config)

            self.send_pending_alerts()

            next_run = datetime.datetime.utcnow() + self.run_every

            for rule in self.rules:
                # Set endtime based on the rule's delay
                delay = rule.get('query_delay')
                if hasattr(self.args, 'end') and self.args.end:
                    endtime = ts_to_dt(self.args.end)
                elif delay:
                    endtime = ts_now() - delay
                else:
                    endtime = ts_now()

                try:
                    num_matches = self.run_rule(rule, endtime, starttime)
                except EAException as e:
                    self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']})
                else:
                    old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time'))
                    logging.info("Ran %s from %s to %s: %s query hits, %s matches,"
                                 " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')),
                                                      self.num_hits, num_matches, self.alerts_sent))
                    self.alerts_sent = 0

                self.remove_old_events(rule)

            if next_run < datetime.datetime.utcnow():
                # We were processing for longer than our refresh interval
                # This can happen if --start was specified with a large time period
                # or if we are running too slow to process events in real time.
                logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every))
                continue

            # Only force starttime once
            starttime = None

            if not self.args.pin_rules:
                self.load_rule_changes()

            # Wait before querying again
            sleep_for = (next_run - datetime.datetime.utcnow()).seconds
            logging.info("Sleeping for %s seconds" % (sleep_for))
            time.sleep(sleep_for)
Exemplo n.º 4
0
    def add_aggregated_alert(self, match, rule):
        """ Save a match as a pending aggregate alert to elasticsearch. """
        if (not rule['current_aggregate_id'] or
                ('aggregate_alert_time' in rule and rule['aggregate_alert_time'] < ts_to_dt(match[rule['timestamp_field']]))):

            # Elastalert may have restarted while pending alerts exist
            pending_alert = self.find_pending_aggregate_alert(rule)
            if pending_alert:
                alert_time = rule['aggregate_alert_time'] = ts_to_dt(pending_alert['_source']['alert_time'])
                agg_id = rule['current_aggregate_id'] = pending_alert['_id']
                elastalert_logger.info('Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time))
            else:
                # First match, set alert_time
                match_time = ts_to_dt(match[rule['timestamp_field']])
                alert_time = ''
                if isinstance(rule['aggregation'], dict) and rule['aggregation'].get('schedule'):
                    croniter._datetime_to_timestamp = cronite_datetime_to_timestamp  # For Python 2.6 compatibility
                    try:
                        iter = croniter(rule['aggregation']['schedule'], ts_now())
                        alert_time = unix_to_dt(iter.get_next())
                    except Exception as e:
                        self.handle_error("Error parsing aggregate send time Cron format %s" % (e), rule['aggregation']['schedule'])
                else:
                    alert_time = match_time + rule['aggregation']

                rule['aggregate_alert_time'] = alert_time
                agg_id = None
                elastalert_logger.info('New aggregation for %s. next alert at %s.' % (rule['name'], alert_time))
        else:
            # Already pending aggregation, use existing alert_time
            alert_time = rule['aggregate_alert_time']
            agg_id = rule['current_aggregate_id']
            elastalert_logger.info('Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time))

        alert_body = self.get_alert_body(match, rule, False, alert_time)
        if agg_id:
            alert_body['aggregate_id'] = agg_id
        res = self.writeback('elastalert', alert_body)

        # If new aggregation, save _id
        if res and not agg_id:
            rule['current_aggregate_id'] = res['_id']

        # Couldn't write the match to ES, save it in memory for now
        if not res:
            rule['agg_matches'].append(match)

        return res
Exemplo n.º 5
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules["es_host"], port=self.rules["es_port"])
        window_size = datetime.timedelta(**self.rules.get("terms_window_size", {"days": 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get("use_strftime_index"):
            index = format_index(self.rules["index"], start, end)
        else:
            index = self.rules["index"]
        time_filter = {self.rules["timestamp_field"]: {"lte": dt_to_ts(end), "gte": dt_to_ts(start)}}
        query_template["filter"] = {"bool": {"must": [{"range": time_filter}]}}
        query = {"aggs": {"filtered": query_template}}

        for field in self.fields:
            field_name["field"] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if "aggregations" in res:
                buckets = res["aggregations"]["filtered"]["values"]["buckets"]
                keys = [bucket["key"] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info("Found %s unique values for %s" % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info("Found no values for %s" % (field))
Exemplo n.º 6
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 7
0
    def get_starttime(self, rule):
        """ Query ES for the last time we ran this rule.

        :param rule: The rule configuration.
        :return: A timestamp or None.
        """
        query = {'filter': {'term': {'rule_name': '%s' % (rule['name'])}},
                 'sort': {'@timestamp': {'order': 'desc'}}}
        try:
            if self.writeback_es:
                res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert_status',
                                               size=1, body=query, _source_include=['endtime', 'rule_name'])
                if res['hits']['hits']:
                    endtime = ts_to_dt(res['hits']['hits'][0]['_source']['endtime'])

                    if ts_now() - endtime < self.old_query_limit:
                        return endtime
                    else:
                        logging.info("Found expired previous run for %s at %s" % (rule['name'], endtime))
                        return None
        except (ElasticsearchException, KeyError) as e:
            self.handle_error('Error querying for last run: %s' % (e), {'rule': rule['name']})
            self.writeback_es = None

        return None
Exemplo n.º 8
0
    def silence(self):
        """ Silence an alert for a period of time. --silence and --rule must be passed as args. """
        if self.debug:
            logging.error('--silence not compatible with --debug')
            exit(1)

        if not self.args.rule:
            logging.error('--silence must be used with --rule')
            exit(1)

        # With --rule, self.rules will only contain that specific rule
        rule_name = self.rules[0]['name']

        try:
            unit, num = self.args.silence.split('=')
            silence_time = datetime.timedelta(**{unit: int(num)})
            # Double conversion to add tzinfo
            silence_ts = ts_to_dt(dt_to_ts(silence_time + datetime.datetime.utcnow()))
        except (ValueError, TypeError):
            logging.error('%s is not a valid time period' % (self.args.silence))
            exit(1)

        if not self.set_realert(rule_name, silence_ts, 0):
            logging.error('Failed to save silence command to elasticsearch')
            exit(1)

        logging.info('Success. %s will be silenced until %s' % (rule_name, silence_ts))
Exemplo n.º 9
0
 def get_match_str(self, match):
     lt = self.rules.get('use_local_time')
     starttime = pretty_ts(dt_to_ts(ts_to_dt(match[self.ts_field]) - self.rules['timeframe']), lt)
     endtime = pretty_ts(match[self.ts_field], lt)
     message = 'At least %d events occurred between %s and %s\n\n' % (self.rules['num_events'],
                                                                      starttime,
                                                                      endtime)
     return message
Exemplo n.º 10
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = sub_field
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                # For non-composite keys, only a single agg is needed
                field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    self.seen_values[tuple(field)] = []
                    for bucket in buckets:
                        # We need to walk down the hierarchy and obtain the value at each level
                        self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    # If we don't have any results, it could either be because of the absence of any baseline data
                    # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                    # end-users a heads up to help them debug what might be going on.
                    if not self.seen_values[tuple(field)]:
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                else:
                    keys = [bucket['key'] for bucket in buckets]
                    self.seen_values[field] = keys
                    elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 11
0
 def get_match_str(self, match):
     lt = self.rules.get('use_local_time')
     starttime = pretty_ts(dt_to_ts(ts_to_dt(match[self.ts_field]) - self.rules['timeframe']), lt)
     endtime = pretty_ts(match[self.ts_field], lt)
     message = ('A maximum of %d unique %s(s) occurred since last alert or '
                'between %s and %s\n\n' % (self.rules['max_cardinality'],
                                           self.rules['cardinality_field'],
                                           starttime, endtime))
     return message
Exemplo n.º 12
0
 def get_match_str(self, match):
     lt = self.rules.get('use_local_time')
     match_ts = lookup_es_key(match, self.ts_field)
     starttime = pretty_ts(dt_to_ts(ts_to_dt(match_ts) - self.rules['timeframe']), lt)
     message = 'At least %d(%d) events occurred between %s and %s\n\n' % (self.rules['num_events'],
                                                                      match['count'],
                                                                      starttime,
                                                                      endtime)
     return message
Exemplo n.º 13
0
 def get_match_str(self, match):
     ts = match[self.rules['timestamp_field']]
     lt = self.rules.get('use_local_time')
     message = 'An abnormally low number of events occurred around %s.\n' % (pretty_ts(ts, lt))
     message += 'Between %s and %s, there were less than %s events.\n\n' % (
         pretty_ts(dt_to_ts(ts_to_dt(ts) - self.rules['timeframe']), lt),
         pretty_ts(ts, lt),
         self.rules['threshold'])
     return message
Exemplo n.º 14
0
 def get_match_str(self, match):
     lt = self.rules.get("use_local_time")
     starttime = pretty_ts(dt_to_ts(ts_to_dt(match[self.ts_field]) - self.rules["timeframe"]), lt)
     endtime = pretty_ts(match[self.ts_field], lt)
     message = "A maximum of %d unique %s(s) occurred since last alert or " "between %s and %s\n\n" % (
         self.rules["max_cardinality"],
         self.rules["cardinality_field"],
         starttime,
         endtime,
     )
     return message
Exemplo n.º 15
0
    def send_pending_alerts(self):
        pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit)
        for alert in pending_alerts:
            _id = alert['_id']
            alert = alert['_source']
            try:
                rule_name = alert.pop('rule_name')
                alert_time = alert.pop('alert_time')
                match_body = alert.pop('match_body')
            except KeyError:
                # Malformed alert, drop it
                continue

            # Find original rule
            for rule in self.rules:
                if rule['name'] == rule_name:
                    break
            else:
                # Original rule is missing, keep alert for later if rule reappears
                continue

            # Set current_es for top_count_keys query
            rule_es_conn_config = self.build_es_conn_config(rule)
            self.current_es = self.new_elasticsearch(rule_es_conn_config)
            self.current_es_addr = (rule['es_host'], rule['es_port'])

            # Send the alert unless it's a future alert
            if ts_now() > ts_to_dt(alert_time):
                aggregated_matches = self.get_aggregated_matches(_id)
                if aggregated_matches:
                    matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches]
                    self.alert(matches, rule, alert_time=alert_time)
                    if rule['current_aggregate_id'] == _id:
                        rule['current_aggregate_id'] = None
                else:
                    self.alert([match_body], rule, alert_time=alert_time)

                # Delete it from the index
                try:
                    self.writeback_es.delete(index=self.writeback_index,
                                             doc_type='elastalert',
                                             id=_id)
                except:  # TODO: Give this a more relevant exception, try:except: is evil.
                    self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time))

        # Send in memory aggregated alerts
        for rule in self.rules:
            if rule['agg_matches']:
                if ts_now() > rule['aggregate_alert_time']:
                    self.alert(rule['agg_matches'], rule)
                    rule['agg_matches'] = []
Exemplo n.º 16
0
    def send_pending_alerts(self):
        pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit)
        for alert in pending_alerts:
            _id = alert['_id']
            alert = alert['_source']
            try:
                rule_name = alert.pop('rule_name')
                alert_time = alert.pop('alert_time')
                match_body = alert.pop('match_body')
            except KeyError:
                # Malformed alert, drop it
                continue

            agg_id = alert.get('aggregate_id', None)
            if agg_id:
                # Aggregated alerts will be taken care of by get_aggregated_matches
                continue

            # Find original rule
            for rule in self.rules:
                if rule['name'] == rule_name:
                    break
            else:
                # Original rule is missing, drop alert
                continue

            # Retry the alert unless it's a future alert
            if ts_now() > ts_to_dt(alert_time):
                aggregated_matches = self.get_aggregated_matches(_id)
                if aggregated_matches:
                    matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches]
                    self.alert(matches, rule, alert_time=alert_time)
                    rule['current_aggregate_id'] = None
                else:
                    self.alert([match_body], rule, alert_time=alert_time)

                # Delete it from the index
                try:
                    self.writeback_es.delete(index=self.writeback_index,
                                             doc_type='elastalert',
                                             id=_id)
                except:
                    self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time))

        # Send in memory aggregated alerts
        for rule in self.rules:
            if rule['agg_matches']:
                if ts_now() > rule['aggregate_alert_time']:
                    self.alert(rule['agg_matches'], rule)
                    rule['agg_matches'] = []
Exemplo n.º 17
0
 def get_match_str(self, match):
     lt = self.rules.get('use_local_time')
     starttime = (ts_to_dt(match[self.ts_field]) - self.rules['timeframe']).isoformat()
     endtime = match[self.ts_field]
     if 'max_cardinality' in self.rules:
         message = ('A maximum of %d unique %s(s) occurred since last alert or between %s and %s\n\n' % (
             self.rules['max_cardinality'],
             self.rules['query_key'],
             starttime, endtime))
     else:
         message = ('Less than %d unique %s(s) occurred since last alert or between %s and %s\n\n' % (
             self.rules['min_cardinality'],
             self.rules['query_key'],
             starttime, endtime))
     return message
Exemplo n.º 18
0
    def get_match_str(self, match):
        ts = match[self.rules['timestamp_field']]
        lt = self.rules.get('use_local_time')

        try:
            match_value = self.match_value[-1][:5]
        except:
            match_value = []

        message =  "Between %s and %s\n" % (pretty_ts(dt_to_ts(ts_to_dt(ts) - self.rules['timeframe']), lt), pretty_ts(ts, lt))
        message += "%s(%s) %s %s\nmatch value:\n\t%s...\n\n" % (
                self.rules['stat'],
                self.rules['stat_field'],
                self.rules['stat_type'],
                self.rules['threshold'],
                '\n\t'.join(match_value)
                ) 
        return message
Exemplo n.º 19
0
    def run_all_rules(self):
        """ Run each rule one time """
        # If writeback_es errored, it's disabled until the next query cycle
        if not self.writeback_es:
            self.writeback_es = self.new_elasticsearch(self.es_conn_config)

        self.send_pending_alerts()

        next_run = datetime.datetime.utcnow() + self.run_every

        for rule in self.rules:
            # Set endtime based on the rule's delay
            delay = rule.get('query_delay')
            if hasattr(self.args, 'end') and self.args.end:
                endtime = ts_to_dt(self.args.end)
            elif delay:
                endtime = ts_now() - delay
            else:
                endtime = ts_now()

            try:
                num_matches = self.run_rule(rule, endtime, self.starttime)
            except EAException as e:
                self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']})
            else:
                old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time'))
                logging.info("Ran %s from %s to %s: %s query hits, %s matches,"
                             " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')),
                                                  self.num_hits, num_matches, self.alerts_sent))
                self.alerts_sent = 0

            self.remove_old_events(rule)

        if next_run < datetime.datetime.utcnow():
            # We were processing for longer than our refresh interval
            # This can happen if --start was specified with a large time period
            # or if we are running too slow to process events in real time.
            logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every))

        # Only force starttime once
        self.starttime = None

        if not self.args.pin_rules:
            self.load_rule_changes()
Exemplo n.º 20
0
    def start(self):
        """ Periodically go through each rule and run it """
        if self.starttime:
            try:
                self.starttime = ts_to_dt(self.starttime)
            except (TypeError, ValueError):
                self.handle_error("%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (self.starttime))
                exit(1)
        self.running = True
        while self.running:
            next_run = datetime.datetime.utcnow() + self.run_every
            self.run_all_rules()

            if next_run < datetime.datetime.utcnow():
                continue

            # Wait before querying again
            sleep_duration = (next_run - datetime.datetime.utcnow()).seconds
            self.sleep_for(sleep_duration)
Exemplo n.º 21
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'],
                                port=self.rules['es_port'],
                                timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(
            **self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {
            self.rules['timestamp_field']: {
                'lte': dt_to_ts(end),
                'gte': dt_to_ts(start)
            }
        }
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query,
                                 index=index,
                                 ignore_unavailable=True,
                                 timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' %
                                       (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 22
0
    def create_default_title(self, matches):
        subject = '%s: %d matches found - %s' % \
                      (self.rule['name'], len(matches),
                      pretty_ts(ts_to_dt(self.pipeline['alert_time'])))

        return subject
Exemplo n.º 23
0
    def alert(self, matches, rule, alert_time=None):
        """ Send out an alert.

        :param matches: A list of matches.
        :param rule: A rule configuration.
        """
        if alert_time is None:
            alert_time = ts_now()

        # Compute top count keys
        if rule.get('top_count_keys'):
            for match in matches:
                if 'query_key' in rule and rule['query_key'] in match:
                    qk = match[rule['query_key']]
                else:
                    qk = None
                start = ts_to_dt(match[rule['timestamp_field']]) - rule.get('timeframe', datetime.timedelta(minutes=10))
                end = ts_to_dt(match[rule['timestamp_field']]) + datetime.timedelta(minutes=10)
                keys = rule.get('top_count_keys')
                counts = self.get_top_counts(rule, start, end, keys, rule.get('top_count_number'), qk)
                match.update(counts)

        # Generate a kibana dashboard for the first match
        if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'):
            try:
                if rule.get('generate_kibana_link'):
                    kb_link = self.generate_kibana_db(rule, matches[0])
                else:
                    kb_link = self.use_kibana_link(rule, matches[0])
            except EAException as e:
                self.handle_error("Could not generate kibana dash for %s match: %s" % (rule['name'], e))
            else:
                if kb_link:
                    matches[0]['kibana_link'] = kb_link

        for enhancement in rule['match_enhancements']:
            for match in matches:
                try:
                    enhancement.process(match)
                except EAException as e:
                    self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']})

        # Don't send real alerts in debug mode
        if self.debug:
            alerter = DebugAlerter(rule)
            alerter.alert(matches)
            return

        # Run the alerts
        alert_sent = False
        alert_exception = None
        for alert in rule['alert']:
            try:
                alert.alert(matches)
            except EAException as e:
                self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']})
                alert_exception = str(e)
            else:
                self.alerts_sent += 1
                alert_sent = True

        # Write the alert(s) to ES
        agg_id = None
        for match in matches:
            alert_body = self.get_alert_body(match, rule, alert_sent, alert_time, alert_exception)
            # Set all matches to aggregate together
            if agg_id:
                alert_body['aggregate_id'] = agg_id
            res = self.writeback('elastalert', alert_body)
            if res and not agg_id:
                agg_id = res['_id']
Exemplo n.º 24
0
    def send_alert(self, matches, rule, alert_time=None):
        """ Send out an alert.

        :param matches: A list of matches.
        :param rule: A rule configuration.
        """
        if alert_time is None:
            alert_time = ts_now()

        # Compute top count keys
        if rule.get('top_count_keys'):
            for match in matches:
                if 'query_key' in rule and rule['query_key'] in match:
                    qk = match[rule['query_key']]
                else:
                    qk = None
                start = ts_to_dt(match[rule['timestamp_field']]) - rule.get('timeframe', datetime.timedelta(minutes=10))
                end = ts_to_dt(match[rule['timestamp_field']]) + datetime.timedelta(minutes=10)
                keys = rule.get('top_count_keys')
                counts = self.get_top_counts(rule, start, end, keys, qk=qk)
                match.update(counts)

        # Generate a kibana3 dashboard for the first match
        if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'):
            try:
                if rule.get('generate_kibana_link'):
                    kb_link = self.generate_kibana_db(rule, matches[0])
                else:
                    kb_link = self.use_kibana_link(rule, matches[0])
            except EAException as e:
                self.handle_error("Could not generate kibana dash for %s match: %s" % (rule['name'], e))
            else:
                if kb_link:
                    matches[0]['kibana_link'] = kb_link

        if rule.get('use_kibana4_dashboard'):
            kb_link = self.generate_kibana4_db(rule, matches[0])
            if kb_link:
                matches[0]['kibana_link'] = kb_link

        #if not rule.get('run_enhancements_first'):
        #    for enhancement in rule.get('match_enhancements'):
        #        valid_matches = []
        #        for match in matches:
        #            try:
        #                enhancement.process(match)
        #                valid_matches.append(match)
        #            except DropMatchException as e:
        #                pass
        #            except EAException as e:
        #                self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']})
        #        matches = valid_matches
        #        if not matches:
        #            return None

        # Don't send real alerts in debug mode
        if self.debug:
            alerter = DebugAlerter(rule)
            alerter.alert(matches)
            return None

        # Run the alerts
        alert_sent = False
        alert_exception = None
        # Alert.pipeline is a single object shared between every alerter
        # This allows alerters to pass objects and data between themselves
        alert_pipeline = {"alert_time": alert_time}
        for alert in rule.get("actions"):
            alert.pipeline = alert_pipeline
            try:
                alert.alert(matches)
            except EAException as e:
                self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']})
                alert_exception = str(e)
            else:
                self.alerts_sent += 1
                alert_sent = True
Exemplo n.º 25
0
 def replace_ts(self, hits, rule):
     for hit in hits:
         hit['_source'][rule['timestamp_field']] = ts_to_dt(hit['_source'][rule['timestamp_field']])
Exemplo n.º 26
0
 def unwrap_interval_buckets(self, timestamp, query_key, interval_buckets):
     for interval_data in interval_buckets:
         # Use bucket key here instead of start_time for more accurate match timestamp
         self.check_matches(ts_to_dt(interval_data['key_as_string']),
                            query_key, interval_data)
Exemplo n.º 27
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = elasticsearch_client(self.rules)
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        step = datetime.timedelta(**self.rules.get('window_step_size', {'days': 1}))

        for field in self.fields:
            tmp_start = start
            tmp_end = min(start + step, end)

            time_filter = {self.rules['timestamp_field']: {'lt': dt_to_ts(tmp_end), 'gte': dt_to_ts(tmp_start)}}
            query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
            query = {'aggs': {'filtered': query_template}}
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                self.seen_values.setdefault(tuple(field), [])
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = add_raw_postfix(sub_field)
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                self.seen_values.setdefault(field, [])
                # For non-composite keys, only a single agg is needed
                field_name['field'] = add_raw_postfix(field)

            # Query the entire time range in small chunks
            while tmp_start < end:
                if self.rules.get('use_strftime_index'):
                    index = format_index(self.rules['index'], tmp_start, tmp_end)
                else:
                    index = self.rules['index']
                res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
                if 'aggregations' in res:
                    buckets = res['aggregations']['filtered']['values']['buckets']
                    if type(field) == list:
                        # For composite keys, make the lookup based on all fields
                        # Make it a tuple since it can be hashed and used in dictionary lookups
                        for bucket in buckets:
                            # We need to walk down the hierarchy and obtain the value at each level
                            self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    else:
                        keys = [bucket['key'] for bucket in buckets]
                        self.seen_values[field] += keys
                else:
                    self.seen_values.setdefault(field, [])
                if tmp_start == tmp_end:
                    break
                tmp_start = tmp_end
                tmp_end = min(tmp_start + step, end)
                time_filter[self.rules['timestamp_field']] = {'lt': dt_to_ts(tmp_end), 'gte': dt_to_ts(tmp_start)}

            for key, values in self.seen_values.iteritems():
                if not values:
                    if type(key) == tuple:
                        # If we don't have any results, it could either be because of the absence of any baseline data
                        # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                        # end-users a heads up to help them debug what might be going on.
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                    else:
                        elastalert_logger.info('Found no values for %s' % (field))
                    continue
                self.seen_values[key] = list(set(values))
                elastalert_logger.info('Found %s unique values for %s' % (len(values), key))
Exemplo n.º 28
0
 def get_match_str(self, match):
     ts = match[self.rules['timestamp_field']]
     lt = self.rules.get('use_local_time')
     message = 'An abnormally low number of events occurred around %s.\n' % (pretty_ts(ts, lt))
     message += 'Between %s and %s, there were less than %s events.\n\n' % (pretty_ts(dt_to_ts(ts_to_dt(ts) - self.rules['timeframe']), lt),
                                                                            pretty_ts(ts, lt),
                                                                            self.rules['threshold'])
     return message
Exemplo n.º 29
0
    def alert(self, matches, rule, alert_time=None):
        """ Send out an alert.

        :param matches: A list of matches.
        :param rule: A rule configuration.
        """
        if alert_time is None:
            alert_time = ts_now()

        # Compute top count keys
        if rule.get('top_count_keys'):
            for match in matches:
                if 'query_key' in rule and rule['query_key'] in match:
                    qk = match[rule['query_key']]
                else:
                    qk = None
                start = ts_to_dt(match[rule['timestamp_field']]) - rule.get(
                    'timeframe', datetime.timedelta(minutes=10))
                end = ts_to_dt(
                    match[rule['timestamp_field']]) + datetime.timedelta(
                        minutes=10)
                keys = rule.get('top_count_keys')
                counts = self.get_top_counts(rule, start, end, keys,
                                             rule.get('top_count_number'), qk)
                match.update(counts)

        # Generate a kibana dashboard for the first match
        if rule.get('generate_kibana_link') or rule.get(
                'use_kibana_dashboard'):
            try:
                if rule.get('generate_kibana_link'):
                    kb_link = self.generate_kibana_db(rule, matches[0])
                else:
                    kb_link = self.use_kibana_link(rule, matches[0])
            except EAException as e:
                self.handle_error(
                    "Could not generate kibana dash for %s match: %s" %
                    (rule['name'], e))
            else:
                if kb_link:
                    matches[0]['kibana_link'] = kb_link

        for enhancement in rule['match_enhancements']:
            for match in matches:
                try:
                    enhancement.process(match)
                except EAException as e:
                    self.handle_error(
                        "Error running match enhancement: %s" % (e),
                        {'rule': rule['name']})

        # Don't send real alerts in debug mode
        if self.debug:
            alerter = DebugAlerter(rule)
            alerter.alert(matches)
            return

        # Run the alerts
        alert_sent = False
        alert_exception = None
        for alert in rule['alert']:
            try:
                alert.alert(matches)
            except EAException as e:
                self.handle_error(
                    'Error while running alert %s: %s' %
                    (alert.get_info()['type'], e), {'rule': rule['name']})
                alert_exception = str(e)
            else:
                self.alerts_sent += 1
                alert_sent = True

        # Write the alert(s) to ES
        agg_id = None
        for match in matches:
            alert_body = self.get_alert_body(match, rule, alert_sent,
                                             alert_time, alert_exception)
            # Set all matches to aggregate together
            if agg_id:
                alert_body['aggregate_id'] = agg_id
            res = self.writeback('elastalert', alert_body)
            if res and not agg_id:
                agg_id = res['_id']
Exemplo n.º 30
0
    def start(self):
        """ Periodically go through each rule and run it """
        starttime = self.args.start
        if starttime:
            try:
                starttime = ts_to_dt(starttime)
            except (TypeError, ValueError):
                self.handle_error(
                    "%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)"
                    % (starttime))
                exit(1)
        while True:
            # If writeback_es errored, it's disabled until the next query cycle
            if not self.writeback_es:
                self.writeback_es = Elasticsearch(host=self.es_host,
                                                  port=self.es_port)

            self.send_pending_alerts()

            next_run = datetime.datetime.utcnow() + self.run_every

            for rule in self.rules:
                # Set endtime based on the rule's delay
                delay = rule.get('query_delay')
                if hasattr(self.args, 'end') and self.args.end:
                    endtime = ts_to_dt(self.args.end)
                elif delay:
                    endtime = ts_now() - delay
                else:
                    endtime = ts_now()

                try:
                    num_matches = self.run_rule(rule, endtime, starttime)
                except EAException as e:
                    self.handle_error(
                        "Error running rule %s: %s" % (rule['name'], e),
                        {'rule': rule['name']})
                else:
                    old_starttime = pretty_ts(rule.get('original_starttime'),
                                              rule.get('use_local_time'))
                    logging.info(
                        "Ran %s from %s to %s: %s query hits, %s matches,"
                        " %s alerts sent" %
                        (rule['name'], old_starttime,
                         pretty_ts(endtime, rule.get('use_local_time')),
                         self.num_hits, num_matches, self.alerts_sent))
                    self.alerts_sent = 0

                self.remove_old_events(rule)

            if next_run < datetime.datetime.utcnow():
                # We were processing for longer than our refresh interval
                # This can happen if --start was specified with a large time period
                # or if we are running too slow to process events in real time.
                logging.warning("Querying from %s to %s took longer than %s!" %
                                (old_starttime, endtime, self.run_every))
                continue

            # Only force starttime once
            starttime = None

            if not self.args.pin_rules:
                self.load_rule_changes()

            # Wait before querying again
            sleep_for = (next_run - datetime.datetime.utcnow()).seconds
            logging.info("Sleeping for %s seconds" % (sleep_for))
            time.sleep(sleep_for)
Exemplo n.º 31
0
    def send_alert(self, matches, rule, alert_time=None):
        """ Send out an alert.

        :param matches: A list of matches.
        :param rule: A rule configuration.
        """
        if alert_time is None:
            alert_time = ts_now()

        # Compute top count keys
        if rule.get('top_count_keys'):
            for match in matches:
                if 'query_key' in rule and rule['query_key'] in match:
                    qk = match[rule['query_key']]
                else:
                    qk = None
                start = ts_to_dt(match[rule['timestamp_field']]) - rule.get(
                    'timeframe', datetime.timedelta(minutes=10))
                end = ts_to_dt(
                    match[rule['timestamp_field']]) + datetime.timedelta(
                        minutes=10)
                keys = rule.get('top_count_keys')
                counts = self.get_top_counts(rule, start, end, keys, qk=qk)
                match.update(counts)

        # Generate a kibana3 dashboard for the first match
        if rule.get('generate_kibana_link') or rule.get(
                'use_kibana_dashboard'):
            try:
                if rule.get('generate_kibana_link'):
                    kb_link = self.generate_kibana_db(rule, matches[0])
                else:
                    kb_link = self.use_kibana_link(rule, matches[0])
            except EAException as e:
                self.handle_error(
                    "Could not generate kibana dash for %s match: %s" %
                    (rule['name'], e))
            else:
                if kb_link:
                    matches[0]['kibana_link'] = kb_link

        if rule.get('use_kibana4_dashboard'):
            kb_link = self.generate_kibana4_db(rule, matches[0])
            if kb_link:
                matches[0]['kibana_link'] = kb_link

        #if not rule.get('run_enhancements_first'):
        #    for enhancement in rule.get('match_enhancements'):
        #        valid_matches = []
        #        for match in matches:
        #            try:
        #                enhancement.process(match)
        #                valid_matches.append(match)
        #            except DropMatchException as e:
        #                pass
        #            except EAException as e:
        #                self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']})
        #        matches = valid_matches
        #        if not matches:
        #            return None

        # Don't send real alerts in debug mode
        if self.debug:
            alerter = DebugAlerter(rule)
            alerter.alert(matches)
            return None

        # Run the alerts
        alert_sent = False
        alert_exception = None
        # Alert.pipeline is a single object shared between every alerter
        # This allows alerters to pass objects and data between themselves
        alert_pipeline = {"alert_time": alert_time}
        for alert in rule.get("actions"):
            alert.pipeline = alert_pipeline
            try:
                alert.alert(matches)
            except EAException as e:
                self.handle_error(
                    'Error while running alert %s: %s' %
                    (alert.get_info()['type'], e), {'rule': rule['name']})
                alert_exception = str(e)
            else:
                self.alerts_sent += 1
                alert_sent = True
Exemplo n.º 32
0
 def replace_ts(self, hits, rule):
     for hit in hits:
         hit['_source'][rule['timestamp_field']] = ts_to_dt(
             hit['_source'][rule['timestamp_field']])
Exemplo n.º 33
0
 def unwrap_interval_buckets(self, timestamp, query_key, interval_buckets):
     for interval_data in interval_buckets:
         # Use bucket key here instead of start_time for more accurate match timestamp
         self.check_matches(ts_to_dt(interval_data['key_as_string']), query_key, interval_data)