def test_missing_aggregation(self): json_output = { "query": { "filtered": { "filter": { "and": [{ "match_all": {} }] }, "query": { "match_all": {} } } }, "aggs": { "missing_user_id": { "missing": { "field": "user_id" } }, }, "size": SIZE_LIMIT } query = HQESQuery('cases').aggregation( MissingAggregation( 'missing_user_id', 'user_id', )) self.checkQuery(query, json_output)
def aggregation(self): if self.expand_value is None: return MissingAggregation( self.es_alias, self.data_source_field ) return FilterAggregation( self.es_alias, filters.term(self.data_source_field, self.expand_value) )
def get_form_duration_stats_by_user( domain, app_id, xmlns, user_ids, startdate, enddate, by_submission_time=True): """Gets stats on the duration of a selected form grouped by users""" date_filter_fn = submitted_filter if by_submission_time else completed_filter missing_users = None in user_ids query = ( FormES() .domain(domain) .user_ids_handle_unknown(user_ids) .remove_default_filter('has_user') .xmlns(xmlns) .filter(date_filter_fn(gte=startdate, lt=enddate)) .aggregation( TermsAggregation('user_id', 'form.meta.userID').aggregation( ExtendedStatsAggregation( 'duration_stats', 'form.meta.timeStart', script="doc['form.meta.timeEnd'].value - doc['form.meta.timeStart'].value", ) ) ) .size(0) ) if app_id: query = query.app(app_id) if missing_users: query = query.aggregation( MissingAggregation('missing_user_id', 'form.meta.userID').aggregation( ExtendedStatsAggregation( 'duration_stats', 'form.meta.timeStart', script="doc['form.meta.timeEnd'].value - doc['form.meta.timeStart'].value", ) ) ) result = {} aggregations = query.run().aggregations if missing_users: result[MISSING_KEY] = aggregations.missing_user_id.bucket.duration_stats.result buckets_dict = aggregations.user_id.buckets_dict for user_id, bucket in buckets_dict.iteritems(): result[user_id] = bucket.duration_stats.result return result
def distinct_values(self, column, size): # missing aggregation can be removed on upgrade to ES 2.0 missing_agg_name = column + '_missing' query = self.es.terms_aggregation(column, column, size=size, sort_field="_term").size(0) query = query.aggregation(MissingAggregation(missing_agg_name, column)) results = query.run() missing_result = getattr(results.aggregations, missing_agg_name).result result = getattr(results.aggregations, column).keys if missing_result['doc_count'] > 0: result.append(None) return result
def get_form_counts_by_user_xmlns(domain, startdate, enddate, user_ids=None, xmlnss=None, by_submission_time=True): missing_users = False date_filter_fn = submitted_filter if by_submission_time else completed_filter query = ( FormES() .domain(domain) .filter(date_filter_fn(gte=startdate, lt=enddate)) .aggregation( TermsAggregation('user_id', 'form.meta.userID').aggregation( TermsAggregation('app_id', 'app_id').aggregation( TermsAggregation('xmlns', 'xmlns') ) ) ) .size(0) ) if user_ids: query = (query .user_ids_handle_unknown(user_ids) .remove_default_filter('has_user')) missing_users = None in user_ids if missing_users: query = query.aggregation( MissingAggregation('missing_user_id', 'form.meta.userID').aggregation( TermsAggregation('app_id', 'app_id').aggregation( TermsAggregation('xmlns', 'xmlns') ) ) ) if xmlnss: query = query.xmlns(xmlnss) counts = defaultdict(lambda: 0) aggregations = query.run().aggregations user_buckets = aggregations.user_id.buckets_list if missing_users: user_buckets.append(aggregations.missing_user_id.bucket) for user_bucket in user_buckets: app_buckets = user_bucket.app_id.buckets_list for app_bucket in app_buckets: xmlns_buckets = app_bucket.xmlns.buckets_list for xmlns_bucket in xmlns_buckets: key = (user_bucket.key, app_bucket.key, xmlns_bucket.key) counts[key] = xmlns_bucket.doc_count return counts
def get_last_form_submissions_by_user(domain, user_ids, app_id=None, xmlns=None): missing_users = None in user_ids query = ( FormES() .domain(domain) .user_ids_handle_unknown(user_ids) .remove_default_filter('has_user') .aggregation( TermsAggregation('user_id', 'form.meta.userID').aggregation( TopHitsAggregation( 'top_hits_last_form_submissions', 'received_on', is_ascending=False, ) ) ) .size(0) ) if app_id: query = query.app(app_id) if xmlns: query = query.xmlns(xmlns) result = {} if missing_users: query = query.aggregation( MissingAggregation('missing_user_id', 'form.meta.userID').aggregation( TopHitsAggregation( 'top_hits_last_form_submissions', 'received_on', is_ascending=False, ) ) ) aggregations = query.run().aggregations if missing_users: result[MISSING_KEY] = aggregations.missing_user_id.bucket.top_hits_last_form_submissions.hits buckets_dict = aggregations.user_id.buckets_dict for user_id, bucket in six.iteritems(buckets_dict): result[user_id] = bucket.top_hits_last_form_submissions.hits return result