def _get_creator_counts(query, count, page): """Get the list of top contributors with the contribution count.""" creator_counts = query.facet_counts()['creator_id']['terms'] total = len(creator_counts) # Pagination creator_counts = creator_counts[((page - 1) * count):(page * count)] # Grab all the users from the user index in ES. user_ids = [x['term'] for x in creator_counts] results = (UserMappingType.search().filter(id__in=user_ids).values_dict( 'id', 'username', 'display_name', 'avatar', 'twitter_usernames', 'last_contribution_date'))[:count] results = UserMappingType.reshape(results) # Calculate days since last activity and # create a {<user_id>: <user>,...} dict for convenience. user_lookup = {} for r in results: lcd = r.get('last_contribution_date', None) if lcd: delta = datetime.now() - lcd r['days_since_last_activity'] = delta.days else: r['days_since_last_activity'] = None user_lookup[r['id']] = r # Add the user to each dict in the creator_counts array. for item in creator_counts: item['user'] = user_lookup.get(item['term'], None) return ([item for item in creator_counts if item['user'] is not None], total)
def _filter_by_users(self, users_filter, invert=False): users = UserMappingType.reshape( UserMappingType.search() # Optimization: Filter out users that have never contributed. .filter(~F(last_contribution_date=None)).filter( users_filter).values_dict("id").everything()) user_ids = [u["id"] for u in users] res = F(creator_id__in=user_ids) if invert: res = ~res return res
def search(request): """Find users by username and displayname. Uses the ES user's index. """ results = [] search_errored = False q = request.GET.get("q") if q: lowerq = q.lower() try: results = (UserMappingType.search().query( iusername__match=lowerq, idisplay_name__match_whitespace=lowerq, itwitter_usernames__match=lowerq, should=True, ).values_dict( "id", "username", "display_name", "avatar", "twitter_usernames", "last_contribution_date", )) results = UserMappingType.reshape(results) except ES_EXCEPTIONS: search_errored = True log.exception("User search failed.") # For now, we're just truncating results at 30 and not doing any # pagination. If somebody complains, we can add pagination or something. results = list(results[:30]) # Calculate days since last activity. for r in results: lcd = r.get("last_contribution_date", None) if lcd: delta = datetime.now() - lcd r["days_since_last_activity"] = delta.days else: r["days_since_last_activity"] = None data = { "q": q, "results": results, "search_errored": search_errored, } return render(request, "community/search.html", data)
def search(request): """Find users by username and displayname. Uses the ES user's index. """ results = [] search_errored = False q = request.GET.get('q') if q: lowerq = q.lower() try: results = ( UserMappingType .search() .query( iusername__match=lowerq, idisplay_name__match_whitespace=lowerq, itwitter_usernames__match=lowerq, should=True) .values_dict('id', 'username', 'display_name', 'avatar', 'twitter_usernames', 'last_contribution_date')) results = UserMappingType.reshape(results) statsd.incr('community.usersearch.success') except ES_EXCEPTIONS: search_errored = True statsd.incr('community.usersearch.error') log.exception('User search failed.') # For now, we're just truncating results at 30 and not doing any # pagination. If somebody complains, we can add pagination or something. results = list(results[:30]) # Calculate days since last activity. for r in results: lcd = r.get('last_contribution_date', None) if lcd: delta = datetime.now() - lcd r['days_since_last_activity'] = delta.days else: r['days_since_last_activity'] = None data = { 'q': q, 'results': results, 'search_errored': search_errored, } return render(request, 'community/search.html', data)
def search(request): """Find users by username and displayname. Uses the ES user's index. """ results = [] search_errored = False q = request.GET.get('q') if q: lowerq = q.lower() try: results = ( UserMappingType .search() .query( iusername__match=lowerq, idisplay_name__match_whitespace=lowerq, itwitter_usernames__match=lowerq, should=True) .values_dict('id', 'username', 'display_name', 'avatar', 'twitter_usernames', 'last_contribution_date')) results = UserMappingType.reshape(results) statsd.incr('community.usersearch.success') except ES_EXCEPTIONS: search_errored = True statsd.incr('community.usersearch.error') log.exception('User search failed.') # For now, we're just truncating results at 30 and not doing any # pagination. If somebody complains, we can add pagination or something. results = list(results[:30]) # Calculate days since last activity. for r in results: lcd = r.get('last_contribution_date', None) if lcd: delta = datetime.now() - lcd r['days_since_last_activity'] = delta.days else: r['days_since_last_activity'] = None data = { 'q': q, 'results': results, 'search_errored': search_errored, } return render(request, 'community/search.html', data)
def _filter_by_users(self, users_filter, invert=False): users = UserMappingType.reshape( UserMappingType .search() # Optimization: Filter out users that have never contributed. .filter(~F(last_contribution_date=None)) .filter(users_filter) .values_dict('id') .everything()) user_ids = [u['id'] for u in users] res = F(creator_id__in=user_ids) if invert: res = ~res return res
def filter_username(self, value): username_lower = value.lower() username_filter = ( F(iusername__prefix=username_lower) | F(idisplay_name__prefix=username_lower) | F(itwitter_usernames__prefix=username_lower)) users = UserMappingType.reshape( UserMappingType .search() .filter(username_filter) .values_dict('id') [:BIG_NUMBER]) return F(creator_id__in=[u['id'] for u in users])
def _get_creator_counts(query, count, page): total = query.count() start = (page - 1) * count end = page * count query_data = query.values("id", "query_count")[start:end] query_data = {obj["id"]: obj["query_count"] for obj in query_data} users_data = (UserMappingType.search().filter( id__in=list(query_data.keys())).values_dict( "id", "username", "display_name", "avatar", "twitter_usernames", "last_contribution_date", )[:count]) users_data = UserMappingType.reshape(users_data) results = [] now = datetime.now() for u_data in users_data: user_id = u_data.get("id") last_contribution_date = u_data.get("last_contribution_date", None) u_data["days_since_last_activity"] = ((now - last_contribution_date).days if last_contribution_date else None) data = { "count": query_data.get(user_id), "term": user_id, "user": u_data } results.append(data) # Descending Order the list according to count. # As the top number of contributor should be at first results = sorted(results, key=itemgetter("count"), reverse=True) return results, total
def _get_creator_counts(query, count, page): total = query.count() start = (page - 1) * count end = page * count query_data = query.values('id', 'query_count')[start:end] query_data = {obj['id']: obj['query_count'] for obj in query_data} users_data = (UserMappingType.search().filter( id__in=query_data.keys()).values_dict( 'id', 'username', 'display_name', 'avatar', 'twitter_usernames', 'last_contribution_date')[:count]) users_data = UserMappingType.reshape(users_data) results = [] now = datetime.now() for u_data in users_data: user_id = u_data.get('id') last_contribution_date = u_data.get('last_contribution_date', None) u_data['days_since_last_activity'] = ((now - last_contribution_date).days if last_contribution_date else None) data = { 'count': query_data.get(user_id), 'term': user_id, 'user': u_data } results.append(data) # Descending Order the list according to count. # As the top number of contributor should be at first results = sorted(results, key=itemgetter('count'), reverse=True) return results, total
def _get_creator_counts(query, count, page): total = query.count() start = (page - 1) * count end = page * count query_data = query.values('id', 'query_count')[start:end] query_data = {obj['id']: obj['query_count'] for obj in query_data} users_data = (UserMappingType.search().filter(id__in=query_data.keys()) .values_dict('id', 'username', 'display_name', 'avatar', 'twitter_usernames', 'last_contribution_date')[:count]) users_data = UserMappingType.reshape(users_data) results = [] now = datetime.now() for u_data in users_data: user_id = u_data.get('id') last_contribution_date = u_data.get('last_contribution_date', None) u_data['days_since_last_activity'] = ((now - last_contribution_date).days if last_contribution_date else None) data = { 'count': query_data.get(user_id), 'term': user_id, 'user': u_data } results.append(data) # Descending Order the list according to count. # As the top number of contributor should be at first results = sorted(results, key=itemgetter('count'), reverse=True) return results, total
def _get_creator_counts(query, count, page): """Get the list of top contributors with the contribution count.""" creator_counts = query.facet_counts()['creator_id']['terms'] total = len(creator_counts) # Pagination creator_counts = creator_counts[((page - 1) * count):(page * count)] # Grab all the users from the user index in ES. user_ids = [x['term'] for x in creator_counts] results = ( UserMappingType .search() .filter(id__in=user_ids) .values_dict('id', 'username', 'display_name', 'avatar', 'twitter_usernames', 'last_contribution_date'))[:count] results = UserMappingType.reshape(results) # Calculate days since last activity and # create a {<user_id>: <user>,...} dict for convenience. user_lookup = {} for r in results: lcd = r.get('last_contribution_date', None) if lcd: delta = datetime.now() - lcd r['days_since_last_activity'] = delta.days else: r['days_since_last_activity'] = None user_lookup[r['id']] = r # Add the user to each dict in the creator_counts array. for item in creator_counts: item['user'] = user_lookup.get(item['term'], None) return ([item for item in creator_counts if item['user'] is not None], total)
def get_data(self, request): super(TopContributorsLocalization, self).get_data(request) # This is the base of all the metrics. Each metric branches off from # this to get a particular metric type, since we can't do Aggregates. base_query = RevisionMetricsMappingType.search() base_filters = self.get_filters() # This branch is to get the number of revisions made by each user. revision_query = base_query.filter(base_filters).facet("creator_id", filtered=True, size=BIG_NUMBER) # This branch is to get the number of reviews done by each user. reviewer_query = base_query.filter(base_filters).facet("reviewer_id", filtered=True, size=BIG_NUMBER) # Collect two lists of objects that correlates users and the appropriate metric count revision_creator_counts = revision_query.facet_counts( )["creator_id"]["terms"] revision_reviewer_counts = reviewer_query.facet_counts( )["reviewer_id"]["terms"] # Combine all the metric types into one big list. combined = defaultdict(lambda: { "revision_count": 0, "review_count": 0, }) for d in revision_creator_counts: combined[d["term"]]["user_id"] = d["term"] combined[d["term"]]["revision_count"] = d["count"] for d in revision_reviewer_counts: combined[d["term"]]["user_id"] = d["term"] combined[d["term"]]["review_count"] = d["count"] # Sort by revision count, and get just the ids into a list. sort_key = self.query_values["ordering"] if sort_key[0] == "-": sort_reverse = True sort_key = sort_key[1:] else: sort_reverse = False top_contributors = list(combined.values()) top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse) user_ids = [c["user_id"] for c in top_contributors] full_count = len(user_ids) # Paginate those user ids. page_start = (self.query_values["page"] - 1) * self.query_values["page_size"] page_end = page_start + self.query_values["page_size"] user_ids = user_ids[page_start:page_end] # Get full user objects for every id on this page. users = UserMappingType.reshape( UserMappingType.search().filter(id__in=user_ids).values_dict( "id", "username", "display_name", "avatar", "last_contribution_date")[:self.query_values["page_size"]]) # For ever user object found, mix in the metrics counts for that user, # and then reshape the data to make more sense to clients. data = [] for u in users: d = combined[u["id"]] d["user"] = u d["last_contribution_date"] = d["user"].get( "last_contribution_date", None) d.pop("user_id", None) d["user"].pop("id", None) d["user"].pop("last_contribution_date", None) data.append(d) # One last sort, since ES didn't return the users in any particular order. data.sort(key=lambda d: d[sort_key], reverse=sort_reverse) # Add ranks to the objects. for i, contributor in enumerate(data, 1): contributor["rank"] = page_start + i return { "results": data, "count": full_count, "filters": self.query_values, "allowed_orderings": self.get_allowed_orderings(), "warnings": self.warnings, }
def get_data(self, request): super(TopContributorsQuestions, self).get_data(request) # This is the base of all the metrics. Each metric branches off from # this to get a particular metric type, since we can't do Aggregates. query = AnswerMetricsMappingType.search() base_filters = self.get_filters() # This branch is to get the total number of answers for each user. answer_query = query.filter(base_filters).facet("creator_id", filtered=True, size=BIG_NUMBER) # This branch gets the number of answers that are solutions for each user. solutions_filter = base_filters & F(is_solution=True) solutions_query = query.filter(solutions_filter).facet("creator_id", filtered=True, size=BIG_NUMBER) # This branch gets the number of helpful votes across all answers for # each user. It is a raw facet because elasticutils only supports the # term facet type in non-raw facets. Because it is raw facet, we have # to also put the filter in the facet ourselves. helpful_query = query.facet_raw( creator_id={ "terms_stats": { "key_field": "creator_id", "value_field": "helpful_count", }, "facet_filter": query._process_filters(base_filters.filters), }) # Collect three lists of objects that correlates users and the appropriate metric count creator_answer_counts = answer_query.facet_counts( )["creator_id"]["terms"] creator_solutions_counts = solutions_query.facet_counts( )["creator_id"]["terms"] creator_helpful_counts = helpful_query.facet_counts( )["creator_id"]["terms"] # Combine all the metric types into one big list. combined = defaultdict(lambda: { "answer_count": 0, "solution_count": 0, "helpful_vote_count": 0, }) for d in creator_answer_counts: combined[d["term"]]["user_id"] = d["term"] combined[d["term"]]["answer_count"] = d["count"] for d in creator_solutions_counts: combined[d["term"]]["user_id"] = d["term"] combined[d["term"]]["solution_count"] = d["count"] for d in creator_helpful_counts: combined[d["term"]]["user_id"] = d["term"] # Since this is a term_stats filter, not just a term filter, it is total, not count. combined[d["term"]]["helpful_vote_count"] = int(d["total"]) # Sort by answer count, and get just the ids into a list. sort_key = self.query_values["ordering"] if sort_key[0] == "-": sort_reverse = True sort_key = sort_key[1:] else: sort_reverse = False top_contributors = list(combined.values()) top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse) user_ids = [c["user_id"] for c in top_contributors] full_count = len(user_ids) # Paginate those user ids. page_start = (self.query_values["page"] - 1) * self.query_values["page_size"] page_end = page_start + self.query_values["page_size"] user_ids = user_ids[page_start:page_end] # Get full user objects for every id on this page. users = UserMappingType.reshape( UserMappingType.search().filter(id__in=user_ids).values_dict( "id", "username", "display_name", "avatar", "last_contribution_date")[:self.query_values["page_size"]]) # For ever user object found, mix in the metrics counts for that user, # and then reshape the data to make more sense to clients. data = [] for u in users: d = combined[u["id"]] d["user"] = u d["last_contribution_date"] = d["user"].get( "last_contribution_date", None) d.pop("user_id", None) d["user"].pop("id", None) d["user"].pop("last_contribution_date", None) data.append(d) # One last sort, since ES didn't return the users in any particular order. data.sort(key=lambda d: d[sort_key], reverse=sort_reverse) # Add ranks to the objects. for i, contributor in enumerate(data, 1): contributor["rank"] = page_start + i return { "results": data, "count": full_count, "filters": self.query_values, "allowed_orderings": self.get_allowed_orderings(), "warnings": self.warnings, }
def get_data(self, request): super(TopContributorsLocalization, self).get_data(request) # This is the base of all the metrics. Each metric branches off from # this to get a particular metric type, since we can't do Aggregates. base_query = RevisionMetricsMappingType.search() base_filters = self.get_filters() # This branch is to get the number of revisions made by each user. revision_query = ( base_query .filter(base_filters) .facet('creator_id', filtered=True, size=BIG_NUMBER)) # This branch is to get the number of reviews done by each user. reviewer_query = ( base_query .filter(base_filters) .facet('reviewer_id', filtered=True, size=BIG_NUMBER)) # Collect two lists of objects that correlates users and the appropriate metric count revision_creator_counts = revision_query.facet_counts()['creator_id']['terms'] revision_reviewer_counts = reviewer_query.facet_counts()['reviewer_id']['terms'] # Combine all the metric types into one big list. combined = defaultdict(lambda: { 'revision_count': 0, 'review_count': 0, }) for d in revision_creator_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['revision_count'] = d['count'] for d in revision_reviewer_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['review_count'] = d['count'] # Sort by revision count, and get just the ids into a list. sort_key = self.query_values['ordering'] if sort_key[0] == '-': sort_reverse = True sort_key = sort_key[1:] else: sort_reverse = False top_contributors = combined.values() top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse) user_ids = [c['user_id'] for c in top_contributors] full_count = len(user_ids) # Paginate those user ids. page_start = (self.query_values['page'] - 1) * self.query_values['page_size'] page_end = page_start + self.query_values['page_size'] user_ids = user_ids[page_start:page_end] # Get full user objects for every id on this page. users = UserMappingType.reshape( UserMappingType .search() .filter(id__in=user_ids) .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date') [:self.query_values['page_size']]) # For ever user object found, mix in the metrics counts for that user, # and then reshape the data to make more sense to clients. data = [] for u in users: d = combined[u['id']] d['user'] = u d['last_contribution_date'] = d['user'].get('last_contribution_date', None) d.pop('user_id', None) d['user'].pop('id', None) d['user'].pop('last_contribution_date', None) data.append(d) # One last sort, since ES didn't return the users in any particular order. data.sort(key=lambda d: d[sort_key], reverse=sort_reverse) # Add ranks to the objects. for i, contributor in enumerate(data, 1): contributor['rank'] = page_start + i return { 'results': data, 'count': full_count, 'filters': self.query_values, 'allowed_orderings': self.get_allowed_orderings(), 'warnings': self.warnings, }
def get_data(self, request): super(TopContributorsQuestions, self).get_data(request) # This is the base of all the metrics. Each metric branches off from # this to get a particular metric type, since we can't do Aggregates. query = AnswerMetricsMappingType.search() base_filters = self.get_filters() # This branch is to get the total number of answers for each user. answer_query = ( query .filter(base_filters) .facet('creator_id', filtered=True, size=BIG_NUMBER)) # This branch gets the number of answers that are solutions for each user. solutions_filter = base_filters & F(is_solution=True) solutions_query = ( query .filter(solutions_filter) .facet('creator_id', filtered=True, size=BIG_NUMBER)) # This branch gets the number of helpful votes across all answers for # each user. It is a raw facet because elasticutils only supports the # term facet type in non-raw facets. Because it is raw facet, we have # to also put the filter in the facet ourselves. helpful_query = ( query .facet_raw( creator_id={ 'terms_stats': { 'key_field': 'creator_id', 'value_field': 'helpful_count', }, 'facet_filter': query._process_filters(base_filters.filters), })) # Collect three lists of objects that correlates users and the appropriate metric count creator_answer_counts = answer_query.facet_counts()['creator_id']['terms'] creator_solutions_counts = solutions_query.facet_counts()['creator_id']['terms'] creator_helpful_counts = helpful_query.facet_counts()['creator_id']['terms'] # Combine all the metric types into one big list. combined = defaultdict(lambda: { 'answer_count': 0, 'solution_count': 0, 'helpful_vote_count': 0, }) for d in creator_answer_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['answer_count'] = d['count'] for d in creator_solutions_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['solution_count'] = d['count'] for d in creator_helpful_counts: combined[d['term']]['user_id'] = d['term'] # Since this is a term_stats filter, not just a term filter, it is total, not count. combined[d['term']]['helpful_vote_count'] = int(d['total']) # Sort by answer count, and get just the ids into a list. sort_key = self.query_values['ordering'] if sort_key[0] == '-': sort_reverse = True sort_key = sort_key[1:] else: sort_reverse = False top_contributors = combined.values() top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse) user_ids = [c['user_id'] for c in top_contributors] full_count = len(user_ids) # Paginate those user ids. page_start = (self.query_values['page'] - 1) * self.query_values['page_size'] page_end = page_start + self.query_values['page_size'] user_ids = user_ids[page_start:page_end] # Get full user objects for every id on this page. users = UserMappingType.reshape( UserMappingType .search() .filter(id__in=user_ids) .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date') [:self.query_values['page_size']]) # For ever user object found, mix in the metrics counts for that user, # and then reshape the data to make more sense to clients. data = [] for u in users: d = combined[u['id']] d['user'] = u d['last_contribution_date'] = d['user'].get('last_contribution_date', None) d.pop('user_id', None) d['user'].pop('id', None) d['user'].pop('last_contribution_date', None) data.append(d) # One last sort, since ES didn't return the users in any particular order. data.sort(key=lambda d: d[sort_key], reverse=sort_reverse) # Add ranks to the objects. for i, contributor in enumerate(data, 1): contributor['rank'] = page_start + i return { 'results': data, 'count': full_count, 'filters': self.query_values, 'allowed_orderings': self.get_allowed_orderings(), 'warnings': self.warnings, }
def get_data(self, request): super(TopContributorsQuestions, self).get_data(request) # This is the base of all the metrics. Each metric branches off from # this to get a particular metric type, since we can't do Aggregates. query = AnswerMetricsMappingType.search() base_filters = self.get_filters() # This branch is to get the total number of answers for each user. answer_query = ( query .filter(base_filters) .facet('creator_id', filtered=True, size=BIG_NUMBER)) # This branch gets the number of answers that are solutions for each user. solutions_filter = base_filters & F(is_solution=True) solutions_query = ( query .filter(solutions_filter) .facet('creator_id', filtered=True, size=BIG_NUMBER)) # This branch gets the number of helpful votes across all answers for # each user. It is a raw facet because elasticutils only supports the # term facet type in non-raw facets. Because it is raw facet, we have # to also put the filter in the facet ourselves. helpful_query = ( query .facet_raw( creator_id={ 'terms_stats': { 'key_field': 'creator_id', 'value_field': 'helpful_count', }, 'facet_filter': query._process_filters(base_filters.filters), })) # Collect three lists of objects that correlates users and the appropriate metric count creator_answer_counts = answer_query.facet_counts()['creator_id']['terms'] creator_solutions_counts = solutions_query.facet_counts()['creator_id']['terms'] creator_helpful_counts = helpful_query.facet_counts()['creator_id']['terms'] # Combine all the metric types into one big list. combined = defaultdict(lambda: { 'answer_count': 0, 'solution_count': 0, 'helpful_vote_count': 0, }) for d in creator_answer_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['answer_count'] = d['count'] for d in creator_solutions_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['solution_count'] = d['count'] for d in creator_helpful_counts: combined[d['term']]['user_id'] = d['term'] # Since this is a term_stats filter, not just a term filter, it is total, not count. combined[d['term']]['helpful_vote_count'] = int(d['total']) # Sort by answer count, and get just the ids into a list. sort_key = self.query_values['ordering'] if sort_key[0] == '-': sort_reverse = True sort_key = sort_key[1:] else: sort_reverse = False top_contributors = combined.values() top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse) user_ids = [c['user_id'] for c in top_contributors] full_count = len(user_ids) # Paginate those user ids. page_start = (self.query_values['page'] - 1) * self.query_values['page_size'] page_end = page_start + self.query_values['page_size'] user_ids = user_ids[page_start:page_end] # Get full user objects for every id on this page. users = UserMappingType.reshape( UserMappingType .search() .filter(id__in=user_ids) .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date') [:self.query_values['page_size']]) # For ever user object found, mix in the metrics counts for that user, # and then reshape the data to make more sense to clients. data = [] for u in users: d = combined[u['id']] d['user'] = u d['last_contribution_date'] = d['user'].get('last_contribution_date', None) d.pop('user_id', None) d['user'].pop('id', None) d['user'].pop('last_contribution_date', None) data.append(d) # One last sort, since ES didn't return the users in any particular order. data.sort(key=lambda d: d[sort_key], reverse=sort_reverse) # Add ranks to the objects. for i, contributor in enumerate(data, 1): contributor['rank'] = page_start + i return { 'results': data, 'count': full_count, 'filters': self.query_values, 'allowed_orderings': self.get_allowed_orderings(), 'warnings': self.warnings, }
def get_data(self, request): super(TopContributorsLocalization, self).get_data(request) # This is the base of all the metrics. Each metric branches off from # this to get a particular metric type, since we can't do Aggregates. base_query = RevisionMetricsMappingType.search() base_filters = self.get_filters() # This branch is to get the number of revisions made by each user. revision_query = ( base_query .filter(base_filters) .facet('creator_id', filtered=True, size=BIG_NUMBER)) # This branch is to get the number of reviews done by each user. reviewer_query = ( base_query .filter(base_filters) .facet('reviewer_id', filtered=True, size=BIG_NUMBER)) # Collect two lists of objects that correlates users and the appropriate metric count revision_creator_counts = revision_query.facet_counts()['creator_id']['terms'] revision_reviewer_counts = reviewer_query.facet_counts()['reviewer_id']['terms'] # Combine all the metric types into one big list. combined = defaultdict(lambda: { 'revision_count': 0, 'review_count': 0, }) for d in revision_creator_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['revision_count'] = d['count'] for d in revision_reviewer_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['review_count'] = d['count'] # Sort by revision count, and get just the ids into a list. sort_key = self.query_values['ordering'] if sort_key[0] == '-': sort_reverse = True sort_key = sort_key[1:] else: sort_reverse = False top_contributors = combined.values() top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse) user_ids = [c['user_id'] for c in top_contributors] full_count = len(user_ids) # Paginate those user ids. page_start = (self.query_values['page'] - 1) * self.query_values['page_size'] page_end = page_start + self.query_values['page_size'] user_ids = user_ids[page_start:page_end] # Get full user objects for every id on this page. users = UserMappingType.reshape( UserMappingType .search() .filter(id__in=user_ids) .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date') [:self.query_values['page_size']]) # For ever user object found, mix in the metrics counts for that user, # and then reshape the data to make more sense to clients. data = [] for u in users: d = combined[u['id']] d['user'] = u d['last_contribution_date'] = d['user'].get('last_contribution_date', None) d.pop('user_id', None) d['user'].pop('id', None) d['user'].pop('last_contribution_date', None) data.append(d) # One last sort, since ES didn't return the users in any particular order. data.sort(key=lambda d: d[sort_key], reverse=sort_reverse) # Add ranks to the objects. for i, contributor in enumerate(data, 1): contributor['rank'] = page_start + i return { 'results': data, 'count': full_count, 'filters': self.query_values, 'allowed_orderings': self.get_allowed_orderings(), 'warnings': self.warnings, }