Exemplo n.º 1
0
def update_addons_collections_downloads():
    """Update addons+collections download totals."""
    raise_if_reindex_in_progress('amo')

    d = (AddonCollectionCount.objects.values('addon', 'collection')
         .annotate(sum=Sum('count')))

    ts = [tasks.update_addons_collections_downloads.subtask(args=[chunk])
          for chunk in chunked(d, 100)]
    group(ts).apply_async()
Exemplo n.º 2
0
def update_addons_collections_downloads():
    """Update addons+collections download totals."""
    raise_if_reindex_in_progress('amo')

    d = (AddonCollectionCount.objects.values('addon', 'collection')
         .annotate(sum=Sum('count')))

    ts = [tasks.update_addons_collections_downloads.subtask(args=[chunk])
          for chunk in chunked(d, 100)]
    TaskSet(ts).apply_async()
Exemplo n.º 3
0
def index_latest_stats(index=None):
    def fmt(d):
        return d.strftime('%Y-%m-%d')

    raise_if_reindex_in_progress('amo')
    latest = DownloadCount.search(index).order_by('-date').values_dict('date')
    if latest:
        latest = latest[0]['date']
    else:
        latest = fmt(datetime.date.today() - datetime.timedelta(days=1))
    date_range = '%s:%s' % (latest, fmt(datetime.date.today()))
    log.info('index_stats --date=%s' % date_range)
    call_command('index_stats', addons=None, date=date_range)
Exemplo n.º 4
0
def index_latest_stats(index=None):
    if not waffle.switch_is_active('local-statistics-processing'):
        return False

    def fmt(d):
        return d.strftime('%Y-%m-%d')

    raise_if_reindex_in_progress('amo')
    latest = UpdateCount.search(index).order_by('-date').values_dict('date')
    if latest:
        latest = latest[0]['date']
    else:
        latest = fmt(datetime.date.today() - datetime.timedelta(days=1))
    date_range = '%s:%s' % (latest, fmt(datetime.date.today()))
    cron_log.info('index_stats --date=%s' % date_range)
    call_command('index_stats', addons=None, date=date_range)
Exemplo n.º 5
0
def index_latest_stats(index=None):
    if not waffle.switch_is_active('local-statistics-processing'):
        return False

    def fmt(d):
        return d.strftime('%Y-%m-%d')

    raise_if_reindex_in_progress('amo')
    latest = UpdateCount.search(index).order_by('-date').values_dict('date')
    if latest:
        latest = latest[0]['date']
    else:
        latest = fmt(datetime.date.today() - datetime.timedelta(days=1))
    date_range = '%s:%s' % (latest, fmt(datetime.date.today()))
    cron_log.info('index_stats --date=%s' % date_range)
    call_command('index_stats', addons=None, date=date_range)
Exemplo n.º 6
0
def weekly_downloads():
    """
    Update 7-day add-on download counts.
    """

    if not waffle.switch_is_active('local-statistics-processing'):
        return False

    raise_if_reindex_in_progress('amo')

    with connection.cursor() as cursor:
        cursor.execute("""
            SELECT addon_id, SUM(count) AS weekly_count
            FROM download_counts
            WHERE `date` >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
            GROUP BY addon_id
            ORDER BY addon_id""")
        counts = cursor.fetchall()

    addon_ids = [r[0] for r in counts]

    if not addon_ids:
        return

    with connection.cursor() as cursor:
        cursor.execute(
            """
            SELECT id, 0
            FROM addons
            WHERE id NOT IN %s""", (addon_ids, ))
        counts += cursor.fetchall()

        cursor.execute("""
            CREATE TEMPORARY TABLE tmp_wd
            (addon_id INT PRIMARY KEY, count INT)""")
        cursor.execute(
            'INSERT INTO tmp_wd VALUES %s' %
            ','.join(['(%s,%s)'] * len(counts)),
            list(itertools.chain(*counts)))

        cursor.execute("""
            UPDATE addons INNER JOIN tmp_wd
                ON addons.id = tmp_wd.addon_id
            SET weeklydownloads = tmp_wd.count""")
        cursor.execute("DROP TABLE IF EXISTS tmp_wd")
Exemplo n.º 7
0
def weekly_downloads():
    """
    Update 7-day add-on download counts.
    """

    if not waffle.switch_is_active("local-statistics-processing"):
        return False

    raise_if_reindex_in_progress("amo")
    cursor = connection.cursor()
    cursor.execute(
        """
        SELECT addon_id, SUM(count) AS weekly_count
        FROM download_counts
        WHERE `date` >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
        GROUP BY addon_id
        ORDER BY addon_id"""
    )
    counts = cursor.fetchall()
    addon_ids = [r[0] for r in counts]
    if not addon_ids:
        return
    cursor.execute(
        """
        SELECT id, 0
        FROM addons
        WHERE id NOT IN %s""",
        (addon_ids,),
    )
    counts += cursor.fetchall()

    cursor.execute(
        """
        CREATE TEMPORARY TABLE tmp_wd
        (addon_id INT PRIMARY KEY, count INT)"""
    )
    cursor.execute("INSERT INTO tmp_wd VALUES %s" % ",".join(["(%s,%s)"] * len(counts)), list(itertools.chain(*counts)))

    cursor.execute(
        """
        UPDATE addons INNER JOIN tmp_wd
            ON addons.id = tmp_wd.addon_id
        SET weeklydownloads = tmp_wd.count"""
    )
    cursor.execute("DROP TABLE IF EXISTS tmp_wd")
Exemplo n.º 8
0
def update_global_totals(date=None):
    """Update global statistics totals."""
    raise_if_reindex_in_progress('amo')

    if date:
        date = datetime.datetime.strptime(date, '%Y-%m-%d').date()
    # Assume that we want to populate yesterday's stats by default.
    today = date or datetime.date.today() - datetime.timedelta(days=1)
    today_jobs = [dict(job=job, date=today) for job in
                  tasks._get_daily_jobs(date)]

    max_update = date or UpdateCount.objects.aggregate(max=Max('date'))['max']
    metrics_jobs = [dict(job=job, date=max_update) for job in
                    tasks._get_metrics_jobs(date)]

    ts = [tasks.update_global_totals.subtask(kwargs=kw)
          for kw in today_jobs + metrics_jobs]
    TaskSet(ts).apply_async()
Exemplo n.º 9
0
def update_global_totals(date=None):
    """Update global statistics totals."""
    raise_if_reindex_in_progress('amo')

    if date:
        date = datetime.datetime.strptime(date, '%Y-%m-%d').date()
    # Assume that we want to populate yesterday's stats by default.
    today = date or datetime.date.today() - datetime.timedelta(days=1)
    today_jobs = [dict(job=job, date=today) for job in
                  tasks._get_daily_jobs(date)]

    max_update = date or UpdateCount.objects.aggregate(max=Max('date'))['max']
    metrics_jobs = [dict(job=job, date=max_update) for job in
                    tasks._get_metrics_jobs(date)]

    ts = [tasks.update_global_totals.subtask(kwargs=kw)
          for kw in today_jobs + metrics_jobs]
    group(ts).apply_async()
Exemplo n.º 10
0
def update_addon_average_daily_users():
    """Update add-ons ADU totals."""
    if not waffle.switch_is_active("local-statistics-processing"):
        return False

    raise_if_reindex_in_progress("amo")
    cursor = connections[multidb.get_slave()].cursor()
    q = """SELECT addon_id, AVG(`count`)
           FROM update_counts
           WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 7 DAY)
           GROUP BY addon_id
           ORDER BY addon_id"""
    cursor.execute(q)
    d = cursor.fetchall()
    cursor.close()

    ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)]
    TaskSet(ts).apply_async()
Exemplo n.º 11
0
def update_addon_average_daily_users():
    """Update add-ons ADU totals."""
    if not waffle.switch_is_active('local-statistics-processing'):
        return False

    raise_if_reindex_in_progress('amo')
    cursor = connections[multidb.get_slave()].cursor()
    q = """SELECT addon_id, AVG(`count`)
           FROM update_counts
           WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 13 DAY)
           GROUP BY addon_id
           ORDER BY addon_id"""
    cursor.execute(q)
    d = cursor.fetchall()
    cursor.close()

    ts = [_update_addon_average_daily_users.subtask(args=[chunk])
          for chunk in chunked(d, 250)]
    group(ts).apply_async()
Exemplo n.º 12
0
def update_addon_average_daily_users():
    """Update add-ons ADU totals."""
    if not waffle.switch_is_active('local-statistics-processing'):
        return False

    kwargs = {'id_field': 'pk'}
    if waffle.switch_is_active('use-bigquery-for-addon-adu'):
        # BigQuery does not have data for add-ons with type other than those in
        # `ADDON_TYPES_WITH_STATS` so we use download counts instead.
        # See: https://github.com/mozilla/addons-server/issues/14609
        amo_counts = dict(
            Addon.objects.exclude(type__in=amo.ADDON_TYPES_WITH_STATS).exclude(
                guid__isnull=True).exclude(guid__exact='').annotate(
                    count=Coalesce(Sum('downloadcount__count'),
                                   0)).values_list('guid', 'count')
            # Just to make order predictable in tests, we order by id. This
            # matches the GROUP BY being generated so it should be safe.
            .order_by('id'))
        counts = dict(get_addons_and_average_daily_users_from_bigquery())
        counts.update(amo_counts)
        counts = list(counts.items())
        # BigQuery stores GUIDs, not AMO primary keys.
        kwargs['id_field'] = 'guid'
    else:
        raise_if_reindex_in_progress('amo')
        cursor = connections[multidb.get_replica()].cursor()
        q = """SELECT addon_id, AVG(`count`)
            FROM update_counts
            WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 13 DAY)
            GROUP BY addon_id
            ORDER BY addon_id"""
        cursor.execute(q)
        counts = cursor.fetchall()
        cursor.close()

    ts = [
        _update_addon_average_daily_users.subtask(args=[chunk], kwargs=kwargs)
        for chunk in chunked(counts, 250)
    ]
    group(ts).apply_async()
Exemplo n.º 13
0
def update_addon_weekly_downloads(chunk_size=250):
    """
    Update 7-day add-on download counts.
    """
    if waffle.switch_is_active('use-bigquery-for-download-stats-cron'):
        counts = dict(
            # In order to reset the `weekly_downloads` values of add-ons that
            # don't exist in BigQuery, we prepare a set of `(guid, 0)` for most
            # add-ons.
            Addon.objects.filter(type__in=amo.ADDON_TYPES_WITH_STATS).exclude(
                guid__isnull=True
            ).exclude(guid__exact=''
                      ).exclude(weekly_downloads=0
                                ).annotate(count=Value(0, IntegerField())
                                           ).values_list('guid', 'count'))
        # Update the `counts` with values from BigQuery.
        counts.update(get_addons_and_weekly_downloads_from_bigquery())
        counts = list(counts.items())

        log.info('Preparing update of `weekly_downloads` for %s add-ons.',
                 len(counts))

        create_chunked_tasks_signatures(_update_addon_weekly_downloads, counts,
                                        chunk_size).apply_async()
    else:
        raise_if_reindex_in_progress('amo')

        with connection.cursor() as cursor:
            cursor.execute("""
                SELECT addon_id, SUM(count) AS weekly_count
                FROM download_counts
                WHERE `date` >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
                GROUP BY addon_id
                ORDER BY addon_id""")
            counts = cursor.fetchall()

        addon_ids = [r[0] for r in counts]

        if not addon_ids:
            return

        with connection.cursor() as cursor:
            cursor.execute(
                """
                SELECT id, 0
                FROM addons
                WHERE id NOT IN %s""", (addon_ids, ))
            counts += cursor.fetchall()

            cursor.execute("""
                CREATE TEMPORARY TABLE tmp_wd
                (addon_id INT PRIMARY KEY, count INT)""")
            cursor.execute(
                'INSERT INTO tmp_wd VALUES %s' %
                ','.join(['(%s,%s)'] * len(counts)),
                list(itertools.chain(*counts)))

            cursor.execute("""
                UPDATE addons INNER JOIN tmp_wd
                    ON addons.id = tmp_wd.addon_id
                SET weeklydownloads = tmp_wd.count""")
            cursor.execute("DROP TABLE IF EXISTS tmp_wd")