Exemplo n.º 1
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules["es_host"], port=self.rules["es_port"])
        window_size = datetime.timedelta(**self.rules.get("terms_window_size", {"days": 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get("use_strftime_index"):
            index = format_index(self.rules["index"], start, end)
        else:
            index = self.rules["index"]
        time_filter = {self.rules["timestamp_field"]: {"lte": dt_to_ts(end), "gte": dt_to_ts(start)}}
        query_template["filter"] = {"bool": {"must": [{"range": time_filter}]}}
        query = {"aggs": {"filtered": query_template}}

        for field in self.fields:
            field_name["field"] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if "aggregations" in res:
                buckets = res["aggregations"]["filtered"]["values"]["buckets"]
                keys = [bucket["key"] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info("Found %s unique values for %s" % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info("Found no values for %s" % (field))
Exemplo n.º 2
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 3
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = sub_field
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                # For non-composite keys, only a single agg is needed
                field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    self.seen_values[tuple(field)] = []
                    for bucket in buckets:
                        # We need to walk down the hierarchy and obtain the value at each level
                        self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    # If we don't have any results, it could either be because of the absence of any baseline data
                    # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                    # end-users a heads up to help them debug what might be going on.
                    if not self.seen_values[tuple(field)]:
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                else:
                    keys = [bucket['key'] for bucket in buckets]
                    self.seen_values[field] = keys
                    elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 4
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = sub_field
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                # For non-composite keys, only a single agg is needed
                field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    self.seen_values[tuple(field)] = []
                    for bucket in buckets:
                        # We need to walk down the hierarchy and obtain the value at each level
                        self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    # If we don't have any results, it could either be because of the absence of any baseline data
                    # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                    # end-users a heads up to help them debug what might be going on.
                    if not self.seen_values[tuple(field)]:
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                else:
                    keys = [bucket['key'] for bucket in buckets]
                    self.seen_values[field] = keys
                    elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 5
0
 def get_index(rule, starttime=None, endtime=None):
     """ Gets the index for a rule. If strftime is set and starttime and endtime
     are provided, it will return a comma seperated list of indices. If strftime
     is set but starttime and endtime are not provided, it will replace all format
     tokens with a wildcard. """
     index = rule['index']
     if rule.get('use_strftime_index'):
         if starttime and endtime:
             return format_index(index, starttime, endtime)
         else:
             # Replace the substring containing format characters with a *
             format_start = index.find('%')
             format_end = index.rfind('%') + 2
             return index[:format_start] + '*' + index[format_end:]
     else:
         return index
Exemplo n.º 6
0
 def get_index(rule, starttime=None, endtime=None):
     """ Gets the index for a rule. If strftime is set and starttime and endtime
     are provided, it will return a comma seperated list of indices. If strftime
     is set but starttime and endtime are not provided, it will replace all format
     tokens with a wildcard. """
     index = rule['index']
     if rule.get('use_strftime_index'):
         if starttime and endtime:
             return format_index(index, starttime, endtime)
         else:
             # Replace the substring containing format characters with a *
             format_start = index.find('%')
             format_end = index.rfind('%') + 2
             return index[:format_start] + '*' + index[format_end:]
     else:
         return index
Exemplo n.º 7
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'],
                                port=self.rules['es_port'],
                                use_ssl=self.rule['use_ssl'],
                                timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(
            **self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {
            self.rules['timestamp_field']: {
                'lte': dt_to_ts(end),
                'gte': dt_to_ts(start)
            }
        }
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query,
                                 index=index,
                                 ignore_unavailable=True,
                                 timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' %
                                       (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 8
0
    def get_all_terms(self):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))

        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if self.rules.get('use_strftime_index'):
            end = ts_now()
            start = end - window_size
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query_template, index=index, ignore_unavailable=True, timeout=50)
            buckets = res['aggregations']['values']['buckets']
            keys = [bucket['key'] for bucket in buckets]
            self.seen_values[field] = keys