def test_create_client(self, bigquery_client_mock): client = self.create_mock_client() bigquery_client_mock.from_service_account_json.return_value = client credentials = 'path/to/credentials.json' with override_settings(GOOGLE_APPLICATION_CREDENTIALS=credentials): get_addons_and_weekly_downloads_from_bigquery() bigquery_client_mock.from_service_account_json.assert_called_once_with( credentials)
def test_create_query(self, bigquery_client_mock): client = self.create_mock_client() bigquery_client_mock.from_service_account_json.return_value = client expected_query = f""" SELECT hashed_addon_id, SUM(total_downloads) AS count FROM `project.dataset.{AMO_STATS_DOWNLOAD_VIEW}` WHERE submission_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) GROUP BY hashed_addon_id""" get_addons_and_weekly_downloads_from_bigquery() client.query.assert_called_once_with(expected_query)
def test_returned_results(self, bigquery_client_mock): results = [ self.create_bigquery_row({'hashed_addon_id': 1, 'count': 123}), self.create_bigquery_row({'hashed_addon_id': 2, 'count': 456}), ] client = self.create_mock_client(results=results) bigquery_client_mock.from_service_account_json.return_value = client returned_results = get_addons_and_weekly_downloads_from_bigquery() assert returned_results == [(1, 123), (2, 456)]
def update_addon_weekly_downloads(chunk_size=250): """ Update 7-day add-on download counts. """ counts = dict( # In order to reset the `weekly_downloads` values of add-ons that # don't exist in BigQuery, we prepare a set of `(hashed_guid, 0)` # for most add-ons. Addon.objects.filter(type__in=amo.ADDON_TYPES_WITH_STATS).exclude( guid__isnull=True ).exclude(guid__exact='' ).exclude(weekly_downloads=0 ).annotate(count=Value(0, IntegerField()) ).values_list('addonguid__hashed_guid', 'count')) # Update the `counts` with values from BigQuery. counts.update(get_addons_and_weekly_downloads_from_bigquery()) counts = list(counts.items()) log.info('Preparing update of `weekly_downloads` for %s add-ons.', len(counts)) create_chunked_tasks_signatures(_update_addon_weekly_downloads, counts, chunk_size).apply_async()
def update_addon_weekly_downloads(chunk_size=250): """ Update 7-day add-on download counts. """ if waffle.switch_is_active('use-bigquery-for-download-stats-cron'): counts = dict( # In order to reset the `weekly_downloads` values of add-ons that # don't exist in BigQuery, we prepare a set of `(guid, 0)` for most # add-ons. Addon.objects.filter(type__in=amo.ADDON_TYPES_WITH_STATS).exclude( guid__isnull=True ).exclude(guid__exact='' ).exclude(weekly_downloads=0 ).annotate(count=Value(0, IntegerField()) ).values_list('guid', 'count')) # Update the `counts` with values from BigQuery. counts.update(get_addons_and_weekly_downloads_from_bigquery()) counts = list(counts.items()) log.info('Preparing update of `weekly_downloads` for %s add-ons.', len(counts)) create_chunked_tasks_signatures(_update_addon_weekly_downloads, counts, chunk_size).apply_async() else: raise_if_reindex_in_progress('amo') with connection.cursor() as cursor: cursor.execute(""" SELECT addon_id, SUM(count) AS weekly_count FROM download_counts WHERE `date` >= DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""") counts = cursor.fetchall() addon_ids = [r[0] for r in counts] if not addon_ids: return with connection.cursor() as cursor: cursor.execute( """ SELECT id, 0 FROM addons WHERE id NOT IN %s""", (addon_ids, )) counts += cursor.fetchall() cursor.execute(""" CREATE TEMPORARY TABLE tmp_wd (addon_id INT PRIMARY KEY, count INT)""") cursor.execute( 'INSERT INTO tmp_wd VALUES %s' % ','.join(['(%s,%s)'] * len(counts)), list(itertools.chain(*counts))) cursor.execute(""" UPDATE addons INNER JOIN tmp_wd ON addons.id = tmp_wd.addon_id SET weeklydownloads = tmp_wd.count""") cursor.execute("DROP TABLE IF EXISTS tmp_wd")