Пример #1
0
def _get_creator_counts(query, count, page):
    """Get the list of top contributors with the contribution count."""
    creator_counts = query.facet_counts()['creator_id']['terms']

    total = len(creator_counts)

    # Pagination
    creator_counts = creator_counts[((page - 1) * count):(page * count)]

    # Grab all the users from the user index in ES.
    user_ids = [x['term'] for x in creator_counts]
    results = (UserMappingType.search().filter(id__in=user_ids).values_dict(
        'id', 'username', 'display_name', 'avatar', 'twitter_usernames',
        'last_contribution_date'))[:count]
    results = UserMappingType.reshape(results)

    # Calculate days since last activity and
    # create a {<user_id>: <user>,...} dict for convenience.
    user_lookup = {}
    for r in results:
        lcd = r.get('last_contribution_date', None)
        if lcd:
            delta = datetime.now() - lcd
            r['days_since_last_activity'] = delta.days
        else:
            r['days_since_last_activity'] = None

        user_lookup[r['id']] = r

    # Add the user to each dict in the creator_counts array.
    for item in creator_counts:
        item['user'] = user_lookup.get(item['term'], None)

    return ([item for item in creator_counts
             if item['user'] is not None], total)
Пример #2
0
    def _filter_by_users(self, users_filter, invert=False):
        users = UserMappingType.reshape(
            UserMappingType.search()
            # Optimization: Filter out users that have never contributed.
            .filter(~F(last_contribution_date=None)).filter(
                users_filter).values_dict("id").everything())

        user_ids = [u["id"] for u in users]

        res = F(creator_id__in=user_ids)
        if invert:
            res = ~res
        return res
Пример #3
0
def search(request):
    """Find users by username and displayname.

    Uses the ES user's index.
    """
    results = []
    search_errored = False
    q = request.GET.get("q")

    if q:
        lowerq = q.lower()
        try:
            results = (UserMappingType.search().query(
                iusername__match=lowerq,
                idisplay_name__match_whitespace=lowerq,
                itwitter_usernames__match=lowerq,
                should=True,
            ).values_dict(
                "id",
                "username",
                "display_name",
                "avatar",
                "twitter_usernames",
                "last_contribution_date",
            ))
            results = UserMappingType.reshape(results)

        except ES_EXCEPTIONS:
            search_errored = True
            log.exception("User search failed.")

    # For now, we're just truncating results at 30 and not doing any
    # pagination. If somebody complains, we can add pagination or something.
    results = list(results[:30])

    # Calculate days since last activity.
    for r in results:
        lcd = r.get("last_contribution_date", None)
        if lcd:
            delta = datetime.now() - lcd
            r["days_since_last_activity"] = delta.days
        else:
            r["days_since_last_activity"] = None

    data = {
        "q": q,
        "results": results,
        "search_errored": search_errored,
    }

    return render(request, "community/search.html", data)
Пример #4
0
def search(request):
    """Find users by username and displayname.

    Uses the ES user's index.
    """
    results = []
    search_errored = False
    q = request.GET.get('q')

    if q:
        lowerq = q.lower()
        try:
            results = (
                UserMappingType
                .search()
                .query(
                    iusername__match=lowerq,
                    idisplay_name__match_whitespace=lowerq,
                    itwitter_usernames__match=lowerq,
                    should=True)
                .values_dict('id', 'username', 'display_name', 'avatar',
                             'twitter_usernames', 'last_contribution_date'))
            results = UserMappingType.reshape(results)

            statsd.incr('community.usersearch.success')
        except ES_EXCEPTIONS:
            search_errored = True
            statsd.incr('community.usersearch.error')
            log.exception('User search failed.')

    # For now, we're just truncating results at 30 and not doing any
    # pagination. If somebody complains, we can add pagination or something.
    results = list(results[:30])

    # Calculate days since last activity.
    for r in results:
        lcd = r.get('last_contribution_date', None)
        if lcd:
            delta = datetime.now() - lcd
            r['days_since_last_activity'] = delta.days
        else:
            r['days_since_last_activity'] = None

    data = {
        'q': q,
        'results': results,
        'search_errored': search_errored,
    }

    return render(request, 'community/search.html', data)
Пример #5
0
def search(request):
    """Find users by username and displayname.

    Uses the ES user's index.
    """
    results = []
    search_errored = False
    q = request.GET.get('q')

    if q:
        lowerq = q.lower()
        try:
            results = (
                UserMappingType
                .search()
                .query(
                    iusername__match=lowerq,
                    idisplay_name__match_whitespace=lowerq,
                    itwitter_usernames__match=lowerq,
                    should=True)
                .values_dict('id', 'username', 'display_name', 'avatar',
                             'twitter_usernames', 'last_contribution_date'))
            results = UserMappingType.reshape(results)

            statsd.incr('community.usersearch.success')
        except ES_EXCEPTIONS:
            search_errored = True
            statsd.incr('community.usersearch.error')
            log.exception('User search failed.')

    # For now, we're just truncating results at 30 and not doing any
    # pagination. If somebody complains, we can add pagination or something.
    results = list(results[:30])

    # Calculate days since last activity.
    for r in results:
        lcd = r.get('last_contribution_date', None)
        if lcd:
            delta = datetime.now() - lcd
            r['days_since_last_activity'] = delta.days
        else:
            r['days_since_last_activity'] = None

    data = {
        'q': q,
        'results': results,
        'search_errored': search_errored,
    }

    return render(request, 'community/search.html', data)
Пример #6
0
    def _filter_by_users(self, users_filter, invert=False):
        users = UserMappingType.reshape(
            UserMappingType
            .search()
            # Optimization: Filter out users that have never contributed.
            .filter(~F(last_contribution_date=None))
            .filter(users_filter)
            .values_dict('id')
            .everything())

        user_ids = [u['id'] for u in users]

        res = F(creator_id__in=user_ids)
        if invert:
            res = ~res
        return res
Пример #7
0
    def filter_username(self, value):
        username_lower = value.lower()

        username_filter = (
            F(iusername__prefix=username_lower) |
            F(idisplay_name__prefix=username_lower) |
            F(itwitter_usernames__prefix=username_lower))

        users = UserMappingType.reshape(
            UserMappingType
            .search()
            .filter(username_filter)
            .values_dict('id')
            [:BIG_NUMBER])

        return F(creator_id__in=[u['id'] for u in users])
Пример #8
0
def _get_creator_counts(query, count, page):
    total = query.count()

    start = (page - 1) * count
    end = page * count
    query_data = query.values("id", "query_count")[start:end]

    query_data = {obj["id"]: obj["query_count"] for obj in query_data}

    users_data = (UserMappingType.search().filter(
        id__in=list(query_data.keys())).values_dict(
            "id",
            "username",
            "display_name",
            "avatar",
            "twitter_usernames",
            "last_contribution_date",
        )[:count])

    users_data = UserMappingType.reshape(users_data)

    results = []
    now = datetime.now()

    for u_data in users_data:
        user_id = u_data.get("id")
        last_contribution_date = u_data.get("last_contribution_date", None)

        u_data["days_since_last_activity"] = ((now -
                                               last_contribution_date).days if
                                              last_contribution_date else None)

        data = {
            "count": query_data.get(user_id),
            "term": user_id,
            "user": u_data
        }

        results.append(data)

    # Descending Order the list according to count.
    # As the top number of contributor should be at first
    results = sorted(results, key=itemgetter("count"), reverse=True)

    return results, total
Пример #9
0
def _get_creator_counts(query, count, page):
    total = query.count()

    start = (page - 1) * count
    end = page * count
    query_data = query.values('id', 'query_count')[start:end]

    query_data = {obj['id']: obj['query_count'] for obj in query_data}

    users_data = (UserMappingType.search().filter(
        id__in=query_data.keys()).values_dict(
            'id', 'username', 'display_name', 'avatar', 'twitter_usernames',
            'last_contribution_date')[:count])

    users_data = UserMappingType.reshape(users_data)

    results = []
    now = datetime.now()

    for u_data in users_data:
        user_id = u_data.get('id')
        last_contribution_date = u_data.get('last_contribution_date', None)

        u_data['days_since_last_activity'] = ((now -
                                               last_contribution_date).days if
                                              last_contribution_date else None)

        data = {
            'count': query_data.get(user_id),
            'term': user_id,
            'user': u_data
        }

        results.append(data)

    # Descending Order the list according to count.
    # As the top number of contributor should be at first
    results = sorted(results, key=itemgetter('count'), reverse=True)

    return results, total
Пример #10
0
def _get_creator_counts(query, count, page):
    total = query.count()

    start = (page - 1) * count
    end = page * count
    query_data = query.values('id', 'query_count')[start:end]

    query_data = {obj['id']: obj['query_count'] for obj in query_data}

    users_data = (UserMappingType.search().filter(id__in=query_data.keys())
                                 .values_dict('id', 'username', 'display_name',
                                              'avatar', 'twitter_usernames',
                                              'last_contribution_date')[:count])

    users_data = UserMappingType.reshape(users_data)

    results = []
    now = datetime.now()

    for u_data in users_data:
        user_id = u_data.get('id')
        last_contribution_date = u_data.get('last_contribution_date', None)

        u_data['days_since_last_activity'] = ((now - last_contribution_date).days
                                              if last_contribution_date else None)

        data = {
            'count': query_data.get(user_id),
            'term': user_id,
            'user': u_data
        }

        results.append(data)

    # Descending Order the list according to count.
    # As the top number of contributor should be at first
    results = sorted(results, key=itemgetter('count'), reverse=True)

    return results, total
Пример #11
0
def _get_creator_counts(query, count, page):
    """Get the list of top contributors with the contribution count."""
    creator_counts = query.facet_counts()['creator_id']['terms']

    total = len(creator_counts)

    # Pagination
    creator_counts = creator_counts[((page - 1) * count):(page * count)]

    # Grab all the users from the user index in ES.
    user_ids = [x['term'] for x in creator_counts]
    results = (
        UserMappingType
        .search()
        .filter(id__in=user_ids)
        .values_dict('id', 'username', 'display_name', 'avatar',
                     'twitter_usernames', 'last_contribution_date'))[:count]
    results = UserMappingType.reshape(results)

    # Calculate days since last activity and
    # create a {<user_id>: <user>,...} dict for convenience.
    user_lookup = {}
    for r in results:
        lcd = r.get('last_contribution_date', None)
        if lcd:
            delta = datetime.now() - lcd
            r['days_since_last_activity'] = delta.days
        else:
            r['days_since_last_activity'] = None

        user_lookup[r['id']] = r

    # Add the user to each dict in the creator_counts array.
    for item in creator_counts:
        item['user'] = user_lookup.get(item['term'], None)

    return ([item for item in creator_counts if item['user'] is not None],
            total)
Пример #12
0
    def get_data(self, request):
        super(TopContributorsLocalization, self).get_data(request)

        # This is the base of all the metrics. Each metric branches off from
        # this to get a particular metric type, since we can't do Aggregates.
        base_query = RevisionMetricsMappingType.search()
        base_filters = self.get_filters()

        # This branch is to get the number of revisions made by each user.
        revision_query = base_query.filter(base_filters).facet("creator_id",
                                                               filtered=True,
                                                               size=BIG_NUMBER)

        # This branch is to get the number of reviews done by each user.
        reviewer_query = base_query.filter(base_filters).facet("reviewer_id",
                                                               filtered=True,
                                                               size=BIG_NUMBER)

        # Collect two lists of objects that correlates users and the appropriate metric count
        revision_creator_counts = revision_query.facet_counts(
        )["creator_id"]["terms"]
        revision_reviewer_counts = reviewer_query.facet_counts(
        )["reviewer_id"]["terms"]

        # Combine all the metric types into one big list.
        combined = defaultdict(lambda: {
            "revision_count": 0,
            "review_count": 0,
        })

        for d in revision_creator_counts:
            combined[d["term"]]["user_id"] = d["term"]
            combined[d["term"]]["revision_count"] = d["count"]

        for d in revision_reviewer_counts:
            combined[d["term"]]["user_id"] = d["term"]
            combined[d["term"]]["review_count"] = d["count"]

        # Sort by revision count, and get just the ids into a list.
        sort_key = self.query_values["ordering"]
        if sort_key[0] == "-":
            sort_reverse = True
            sort_key = sort_key[1:]
        else:
            sort_reverse = False

        top_contributors = list(combined.values())
        top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse)
        user_ids = [c["user_id"] for c in top_contributors]
        full_count = len(user_ids)

        # Paginate those user ids.
        page_start = (self.query_values["page"] -
                      1) * self.query_values["page_size"]
        page_end = page_start + self.query_values["page_size"]
        user_ids = user_ids[page_start:page_end]

        # Get full user objects for every id on this page.
        users = UserMappingType.reshape(
            UserMappingType.search().filter(id__in=user_ids).values_dict(
                "id", "username", "display_name", "avatar",
                "last_contribution_date")[:self.query_values["page_size"]])

        # For ever user object found, mix in the metrics counts for that user,
        # and then reshape the data to make more sense to clients.
        data = []
        for u in users:
            d = combined[u["id"]]
            d["user"] = u
            d["last_contribution_date"] = d["user"].get(
                "last_contribution_date", None)
            d.pop("user_id", None)
            d["user"].pop("id", None)
            d["user"].pop("last_contribution_date", None)
            data.append(d)

        # One last sort, since ES didn't return the users in any particular order.
        data.sort(key=lambda d: d[sort_key], reverse=sort_reverse)

        # Add ranks to the objects.
        for i, contributor in enumerate(data, 1):
            contributor["rank"] = page_start + i

        return {
            "results": data,
            "count": full_count,
            "filters": self.query_values,
            "allowed_orderings": self.get_allowed_orderings(),
            "warnings": self.warnings,
        }
Пример #13
0
    def get_data(self, request):
        super(TopContributorsQuestions, self).get_data(request)

        # This is the base of all the metrics. Each metric branches off from
        # this to get a particular metric type, since we can't do Aggregates.
        query = AnswerMetricsMappingType.search()
        base_filters = self.get_filters()

        # This branch is to get the total number of answers for each user.
        answer_query = query.filter(base_filters).facet("creator_id",
                                                        filtered=True,
                                                        size=BIG_NUMBER)

        # This branch gets the number of answers that are solutions for each user.
        solutions_filter = base_filters & F(is_solution=True)
        solutions_query = query.filter(solutions_filter).facet("creator_id",
                                                               filtered=True,
                                                               size=BIG_NUMBER)

        # This branch gets the number of helpful votes across all answers for
        # each user. It is a raw facet because elasticutils only supports the
        # term facet type in non-raw facets. Because it is raw facet, we have
        # to also put the filter in the facet ourselves.
        helpful_query = query.facet_raw(
            creator_id={
                "terms_stats": {
                    "key_field": "creator_id",
                    "value_field": "helpful_count",
                },
                "facet_filter": query._process_filters(base_filters.filters),
            })

        # Collect three lists of objects that correlates users and the appropriate metric count
        creator_answer_counts = answer_query.facet_counts(
        )["creator_id"]["terms"]
        creator_solutions_counts = solutions_query.facet_counts(
        )["creator_id"]["terms"]
        creator_helpful_counts = helpful_query.facet_counts(
        )["creator_id"]["terms"]

        # Combine all the metric types into one big list.
        combined = defaultdict(lambda: {
            "answer_count": 0,
            "solution_count": 0,
            "helpful_vote_count": 0,
        })

        for d in creator_answer_counts:
            combined[d["term"]]["user_id"] = d["term"]
            combined[d["term"]]["answer_count"] = d["count"]

        for d in creator_solutions_counts:
            combined[d["term"]]["user_id"] = d["term"]
            combined[d["term"]]["solution_count"] = d["count"]

        for d in creator_helpful_counts:
            combined[d["term"]]["user_id"] = d["term"]
            # Since this is a term_stats filter, not just a term filter, it is total, not count.
            combined[d["term"]]["helpful_vote_count"] = int(d["total"])

        # Sort by answer count, and get just the ids into a list.
        sort_key = self.query_values["ordering"]
        if sort_key[0] == "-":
            sort_reverse = True
            sort_key = sort_key[1:]
        else:
            sort_reverse = False

        top_contributors = list(combined.values())
        top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse)
        user_ids = [c["user_id"] for c in top_contributors]
        full_count = len(user_ids)

        # Paginate those user ids.
        page_start = (self.query_values["page"] -
                      1) * self.query_values["page_size"]
        page_end = page_start + self.query_values["page_size"]
        user_ids = user_ids[page_start:page_end]

        # Get full user objects for every id on this page.
        users = UserMappingType.reshape(
            UserMappingType.search().filter(id__in=user_ids).values_dict(
                "id", "username", "display_name", "avatar",
                "last_contribution_date")[:self.query_values["page_size"]])

        # For ever user object found, mix in the metrics counts for that user,
        # and then reshape the data to make more sense to clients.
        data = []
        for u in users:
            d = combined[u["id"]]
            d["user"] = u
            d["last_contribution_date"] = d["user"].get(
                "last_contribution_date", None)
            d.pop("user_id", None)
            d["user"].pop("id", None)
            d["user"].pop("last_contribution_date", None)
            data.append(d)

        # One last sort, since ES didn't return the users in any particular order.
        data.sort(key=lambda d: d[sort_key], reverse=sort_reverse)

        # Add ranks to the objects.
        for i, contributor in enumerate(data, 1):
            contributor["rank"] = page_start + i

        return {
            "results": data,
            "count": full_count,
            "filters": self.query_values,
            "allowed_orderings": self.get_allowed_orderings(),
            "warnings": self.warnings,
        }
Пример #14
0
    def get_data(self, request):
        super(TopContributorsLocalization, self).get_data(request)

        # This is the base of all the metrics. Each metric branches off from
        # this to get a particular metric type, since we can't do Aggregates.
        base_query = RevisionMetricsMappingType.search()
        base_filters = self.get_filters()

        # This branch is to get the number of revisions made by each user.
        revision_query = (
            base_query
            .filter(base_filters)
            .facet('creator_id', filtered=True, size=BIG_NUMBER))

        # This branch is to get the number of reviews done by each user.
        reviewer_query = (
            base_query
            .filter(base_filters)
            .facet('reviewer_id', filtered=True, size=BIG_NUMBER))

        # Collect two lists of objects that correlates users and the appropriate metric count
        revision_creator_counts = revision_query.facet_counts()['creator_id']['terms']
        revision_reviewer_counts = reviewer_query.facet_counts()['reviewer_id']['terms']

        # Combine all the metric types into one big list.
        combined = defaultdict(lambda: {
            'revision_count': 0,
            'review_count': 0,
        })

        for d in revision_creator_counts:
            combined[d['term']]['user_id'] = d['term']
            combined[d['term']]['revision_count'] = d['count']

        for d in revision_reviewer_counts:
            combined[d['term']]['user_id'] = d['term']
            combined[d['term']]['review_count'] = d['count']

        # Sort by revision count, and get just the ids into a list.
        sort_key = self.query_values['ordering']
        if sort_key[0] == '-':
            sort_reverse = True
            sort_key = sort_key[1:]
        else:
            sort_reverse = False

        top_contributors = combined.values()
        top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse)
        user_ids = [c['user_id'] for c in top_contributors]
        full_count = len(user_ids)

        # Paginate those user ids.
        page_start = (self.query_values['page'] - 1) * self.query_values['page_size']
        page_end = page_start + self.query_values['page_size']
        user_ids = user_ids[page_start:page_end]

        # Get full user objects for every id on this page.
        users = UserMappingType.reshape(
            UserMappingType
            .search()
            .filter(id__in=user_ids)
            .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date')
            [:self.query_values['page_size']])

        # For ever user object found, mix in the metrics counts for that user,
        # and then reshape the data to make more sense to clients.
        data = []
        for u in users:
            d = combined[u['id']]
            d['user'] = u
            d['last_contribution_date'] = d['user'].get('last_contribution_date', None)
            d.pop('user_id', None)
            d['user'].pop('id', None)
            d['user'].pop('last_contribution_date', None)
            data.append(d)

        # One last sort, since ES didn't return the users in any particular order.
        data.sort(key=lambda d: d[sort_key], reverse=sort_reverse)

        # Add ranks to the objects.
        for i, contributor in enumerate(data, 1):
            contributor['rank'] = page_start + i

        return {
            'results': data,
            'count': full_count,
            'filters': self.query_values,
            'allowed_orderings': self.get_allowed_orderings(),
            'warnings': self.warnings,
        }
Пример #15
0
    def get_data(self, request):
        super(TopContributorsQuestions, self).get_data(request)

        # This is the base of all the metrics. Each metric branches off from
        # this to get a particular metric type, since we can't do Aggregates.
        query = AnswerMetricsMappingType.search()
        base_filters = self.get_filters()

        # This branch is to get the total number of answers for each user.
        answer_query = (
            query
            .filter(base_filters)
            .facet('creator_id', filtered=True, size=BIG_NUMBER))

        # This branch gets the number of answers that are solutions for each user.
        solutions_filter = base_filters & F(is_solution=True)
        solutions_query = (
            query
            .filter(solutions_filter)
            .facet('creator_id', filtered=True, size=BIG_NUMBER))

        # This branch gets the number of helpful votes across all answers for
        # each user. It is a raw facet because elasticutils only supports the
        # term facet type in non-raw facets. Because it is raw facet, we have
        # to also put the filter in the facet ourselves.
        helpful_query = (
            query
            .facet_raw(
                creator_id={
                    'terms_stats': {
                        'key_field': 'creator_id',
                        'value_field': 'helpful_count',
                    },
                    'facet_filter': query._process_filters(base_filters.filters),
                }))

        # Collect three lists of objects that correlates users and the appropriate metric count
        creator_answer_counts = answer_query.facet_counts()['creator_id']['terms']
        creator_solutions_counts = solutions_query.facet_counts()['creator_id']['terms']
        creator_helpful_counts = helpful_query.facet_counts()['creator_id']['terms']

        # Combine all the metric types into one big list.
        combined = defaultdict(lambda: {
            'answer_count': 0,
            'solution_count': 0,
            'helpful_vote_count': 0,
        })

        for d in creator_answer_counts:
            combined[d['term']]['user_id'] = d['term']
            combined[d['term']]['answer_count'] = d['count']

        for d in creator_solutions_counts:
            combined[d['term']]['user_id'] = d['term']
            combined[d['term']]['solution_count'] = d['count']

        for d in creator_helpful_counts:
            combined[d['term']]['user_id'] = d['term']
            # Since this is a term_stats filter, not just a term filter, it is total, not count.
            combined[d['term']]['helpful_vote_count'] = int(d['total'])

        # Sort by answer count, and get just the ids into a list.
        sort_key = self.query_values['ordering']
        if sort_key[0] == '-':
            sort_reverse = True
            sort_key = sort_key[1:]
        else:
            sort_reverse = False

        top_contributors = combined.values()
        top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse)
        user_ids = [c['user_id'] for c in top_contributors]
        full_count = len(user_ids)

        # Paginate those user ids.
        page_start = (self.query_values['page'] - 1) * self.query_values['page_size']
        page_end = page_start + self.query_values['page_size']
        user_ids = user_ids[page_start:page_end]

        # Get full user objects for every id on this page.
        users = UserMappingType.reshape(
            UserMappingType
            .search()
            .filter(id__in=user_ids)
            .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date')
            [:self.query_values['page_size']])

        # For ever user object found, mix in the metrics counts for that user,
        # and then reshape the data to make more sense to clients.
        data = []
        for u in users:
            d = combined[u['id']]
            d['user'] = u
            d['last_contribution_date'] = d['user'].get('last_contribution_date', None)
            d.pop('user_id', None)
            d['user'].pop('id', None)
            d['user'].pop('last_contribution_date', None)
            data.append(d)

        # One last sort, since ES didn't return the users in any particular order.
        data.sort(key=lambda d: d[sort_key], reverse=sort_reverse)

        # Add ranks to the objects.
        for i, contributor in enumerate(data, 1):
            contributor['rank'] = page_start + i

        return {
            'results': data,
            'count': full_count,
            'filters': self.query_values,
            'allowed_orderings': self.get_allowed_orderings(),
            'warnings': self.warnings,
        }
Пример #16
0
    def get_data(self, request):
        super(TopContributorsQuestions, self).get_data(request)

        # This is the base of all the metrics. Each metric branches off from
        # this to get a particular metric type, since we can't do Aggregates.
        query = AnswerMetricsMappingType.search()
        base_filters = self.get_filters()

        # This branch is to get the total number of answers for each user.
        answer_query = (
            query
            .filter(base_filters)
            .facet('creator_id', filtered=True, size=BIG_NUMBER))

        # This branch gets the number of answers that are solutions for each user.
        solutions_filter = base_filters & F(is_solution=True)
        solutions_query = (
            query
            .filter(solutions_filter)
            .facet('creator_id', filtered=True, size=BIG_NUMBER))

        # This branch gets the number of helpful votes across all answers for
        # each user. It is a raw facet because elasticutils only supports the
        # term facet type in non-raw facets. Because it is raw facet, we have
        # to also put the filter in the facet ourselves.
        helpful_query = (
            query
            .facet_raw(
                creator_id={
                    'terms_stats': {
                        'key_field': 'creator_id',
                        'value_field': 'helpful_count',
                    },
                    'facet_filter': query._process_filters(base_filters.filters),
                }))

        # Collect three lists of objects that correlates users and the appropriate metric count
        creator_answer_counts = answer_query.facet_counts()['creator_id']['terms']
        creator_solutions_counts = solutions_query.facet_counts()['creator_id']['terms']
        creator_helpful_counts = helpful_query.facet_counts()['creator_id']['terms']

        # Combine all the metric types into one big list.
        combined = defaultdict(lambda: {
            'answer_count': 0,
            'solution_count': 0,
            'helpful_vote_count': 0,
        })

        for d in creator_answer_counts:
            combined[d['term']]['user_id'] = d['term']
            combined[d['term']]['answer_count'] = d['count']

        for d in creator_solutions_counts:
            combined[d['term']]['user_id'] = d['term']
            combined[d['term']]['solution_count'] = d['count']

        for d in creator_helpful_counts:
            combined[d['term']]['user_id'] = d['term']
            # Since this is a term_stats filter, not just a term filter, it is total, not count.
            combined[d['term']]['helpful_vote_count'] = int(d['total'])

        # Sort by answer count, and get just the ids into a list.
        sort_key = self.query_values['ordering']
        if sort_key[0] == '-':
            sort_reverse = True
            sort_key = sort_key[1:]
        else:
            sort_reverse = False

        top_contributors = combined.values()
        top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse)
        user_ids = [c['user_id'] for c in top_contributors]
        full_count = len(user_ids)

        # Paginate those user ids.
        page_start = (self.query_values['page'] - 1) * self.query_values['page_size']
        page_end = page_start + self.query_values['page_size']
        user_ids = user_ids[page_start:page_end]

        # Get full user objects for every id on this page.
        users = UserMappingType.reshape(
            UserMappingType
            .search()
            .filter(id__in=user_ids)
            .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date')
            [:self.query_values['page_size']])

        # For ever user object found, mix in the metrics counts for that user,
        # and then reshape the data to make more sense to clients.
        data = []
        for u in users:
            d = combined[u['id']]
            d['user'] = u
            d['last_contribution_date'] = d['user'].get('last_contribution_date', None)
            d.pop('user_id', None)
            d['user'].pop('id', None)
            d['user'].pop('last_contribution_date', None)
            data.append(d)

        # One last sort, since ES didn't return the users in any particular order.
        data.sort(key=lambda d: d[sort_key], reverse=sort_reverse)

        # Add ranks to the objects.
        for i, contributor in enumerate(data, 1):
            contributor['rank'] = page_start + i

        return {
            'results': data,
            'count': full_count,
            'filters': self.query_values,
            'allowed_orderings': self.get_allowed_orderings(),
            'warnings': self.warnings,
        }
Пример #17
0
    def get_data(self, request):
        super(TopContributorsLocalization, self).get_data(request)

        # This is the base of all the metrics. Each metric branches off from
        # this to get a particular metric type, since we can't do Aggregates.
        base_query = RevisionMetricsMappingType.search()
        base_filters = self.get_filters()

        # This branch is to get the number of revisions made by each user.
        revision_query = (
            base_query
            .filter(base_filters)
            .facet('creator_id', filtered=True, size=BIG_NUMBER))

        # This branch is to get the number of reviews done by each user.
        reviewer_query = (
            base_query
            .filter(base_filters)
            .facet('reviewer_id', filtered=True, size=BIG_NUMBER))

        # Collect two lists of objects that correlates users and the appropriate metric count
        revision_creator_counts = revision_query.facet_counts()['creator_id']['terms']
        revision_reviewer_counts = reviewer_query.facet_counts()['reviewer_id']['terms']

        # Combine all the metric types into one big list.
        combined = defaultdict(lambda: {
            'revision_count': 0,
            'review_count': 0,
        })

        for d in revision_creator_counts:
            combined[d['term']]['user_id'] = d['term']
            combined[d['term']]['revision_count'] = d['count']

        for d in revision_reviewer_counts:
            combined[d['term']]['user_id'] = d['term']
            combined[d['term']]['review_count'] = d['count']

        # Sort by revision count, and get just the ids into a list.
        sort_key = self.query_values['ordering']
        if sort_key[0] == '-':
            sort_reverse = True
            sort_key = sort_key[1:]
        else:
            sort_reverse = False

        top_contributors = combined.values()
        top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse)
        user_ids = [c['user_id'] for c in top_contributors]
        full_count = len(user_ids)

        # Paginate those user ids.
        page_start = (self.query_values['page'] - 1) * self.query_values['page_size']
        page_end = page_start + self.query_values['page_size']
        user_ids = user_ids[page_start:page_end]

        # Get full user objects for every id on this page.
        users = UserMappingType.reshape(
            UserMappingType
            .search()
            .filter(id__in=user_ids)
            .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date')
            [:self.query_values['page_size']])

        # For ever user object found, mix in the metrics counts for that user,
        # and then reshape the data to make more sense to clients.
        data = []
        for u in users:
            d = combined[u['id']]
            d['user'] = u
            d['last_contribution_date'] = d['user'].get('last_contribution_date', None)
            d.pop('user_id', None)
            d['user'].pop('id', None)
            d['user'].pop('last_contribution_date', None)
            data.append(d)

        # One last sort, since ES didn't return the users in any particular order.
        data.sort(key=lambda d: d[sort_key], reverse=sort_reverse)

        # Add ranks to the objects.
        for i, contributor in enumerate(data, 1):
            contributor['rank'] = page_start + i

        return {
            'results': data,
            'count': full_count,
            'filters': self.query_values,
            'allowed_orderings': self.get_allowed_orderings(),
            'warnings': self.warnings,
        }