def statistics(): """Gather statistics about users and their inboxes""" # the keys of these dictionaries have awful names for historical reasons # don't change them unless you want to do a data migration user_aggregate = { "count": Count("id"), "inbox_count__avg": Avg("inbox_count"), "inbox_count__sum": Sum("inbox_count"), "inbox_count__min": Min("inbox_count"), "inbox_count__max": Max("inbox_count"), } inbox_aggregate = { "email_count__avg": Avg("email_count"), "email_count__sum": Sum("email_count"), "email_count__min": Min("email_count"), "email_count__max": Max("email_count"), } if not "sqlite" in settings.DATABASES["default"]["ENGINE"]: user_aggregate["inbox_count__stddev"] = StdDev("inbox_count") inbox_aggregate["email_count__stddev"] = StdDev("email_count") else: log.info("Can't get standard deviation, use a proper database") users = get_user_model().objects.annotate( inbox_count=Count("inbox__id")).aggregate(**user_aggregate) # aggregate-if doesn't like JOINs - see https://github.com/henriquebastos/django-aggregate-if/issues/1 # so we'll just do a manual query one_day_ago = datetime.now(utc) - timedelta(days=1) users["new"] = get_user_model().objects.filter( date_joined__gte=one_day_ago).count() inboxes = {} for key in list(users.keys()): if key.startswith("inbox"): inboxes[key] = users[key] del users[key] emails = models.Inbox.objects.exclude(flags=models.Inbox.flags.deleted) emails = emails.annotate(email_count=Count("email__id")).aggregate( **inbox_aggregate) stat = models.Statistic( users=users, emails=emails, inboxes=inboxes, date=datetime.now(utc), ) stat.save() log.info("Saved statistics (%s)", stat.date)
def test_statistic(self): now = timezone.now() stat = models.Statistic(date=now) self.assertEqual(repr(stat), "<Statistic: %s>" % now)
def statistics(): """Gather statistics about users and their inboxes""" try: last_stat = models.Statistic.objects.latest("date") except models.Statistic.DoesNotExist: last_stat = None # the keys of these dictionaries have awful names for historical reasons # don't change them unless you want to do a data migration one_day_ago = timezone.now() - timedelta(days=1) user_aggregate = { "count": Count("id", distinct=True), "new": Coalesce( Count( Case(When(date_joined__gte=one_day_ago, then=F("id")), ), distinct=True, ), 0), "oldest_user_joined": Min("date_joined"), "with_inboxes": Coalesce( Count( Case(When(inbox__isnull=False, then=F("id")), ), distinct=True, ), 0), } inbox_aggregate = { "inbox_count__avg": Coalesce(Avg("inbox_count"), 0), "inbox_count__max": Coalesce(Max("inbox_count"), 0), "inbox_count__min": Coalesce(Min("inbox_count"), 0), "inbox_count__stddev": Coalesce(StdDev("inbox_count"), 0), "inbox_count__sum": Coalesce(Sum("inbox_count"), 0), } email_aggregate = { "email_count__avg": Coalesce(Avg("email_count"), 0), "email_count__max": Coalesce(Max("email_count"), 0), "email_count__min": Coalesce(Min("email_count"), 0), "email_count__stddev": Coalesce(StdDev("email_count"), 0), "email_count__sum": Coalesce(Sum("email_count"), 0), } # collect user and inbox stats users = get_user_model().objects.aggregate(**user_aggregate) inboxes = get_user_model().objects.annotate( inbox_count=Count("inbox__id")).aggregate(**inbox_aggregate) domain_count = models.Domain.objects.available(None).count() inboxes_possible = len(settings.INBOX_CHOICES)**settings.INBOX_LENGTH inboxes["total_possible"] = inboxes_possible * domain_count # collect email state inbox_qs = models.Inbox.objects.exclude(deleted=True).annotate( email_count=Count("email__id")) emails = inbox_qs.aggregate(**email_aggregate) inboxes["with_emails"] = inbox_qs.exclude(email_count=0).count() inboxes["disowned"] = models.Inbox.objects.filter( user__isnull=True).count() emails["emails_read"] = models.Email.objects.filter(read=True).count() if last_stat: email_diff = (emails["email_count__sum"] or 0) - (last_stat.emails["email_count__sum"] or 0) emails["running_total"] = last_stat.emails["running_total"] + max( email_diff, 0) else: emails["running_total"] = emails["email_count__sum"] or 0 stat = models.Statistic( users=users, emails=emails, inboxes=inboxes, ) stat.save() log.info("Saved statistics (%s)", stat.date)