def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50)) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: # For composite keys, we will need to perform sub-aggregations if type(field) == list: level = query_template['aggs'] # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query for i, sub_field in enumerate(field): level['values']['terms']['field'] = sub_field if i < len(field) - 1: # If we have more fields after the current one, then set up the next nested structure level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}} level = level['values']['aggs'] else: # For non-composite keys, only a single agg is needed field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups self.seen_values[tuple(field)] = [] for bucket in buckets: # We need to walk down the hierarchy and obtain the value at each level self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket) # If we don't have any results, it could either be because of the absence of any baseline data # OR it may be because the composite key contained a non-primitive type. Either way, give the # end-users a heads up to help them debug what might be going on. if not self.seen_values[tuple(field)]: elastalert_logger.warning(( 'No results were found from all sub-aggregations. This can either indicate that there is ' 'no baseline data OR that a non-primitive field was used in a composite key.' )) else: keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))