def test_create_query(self, bigquery_client_mock): client = self.create_mock_client() bigquery_client_mock.from_service_account_json.return_value = client get_averages_by_addon_from_bigquery(today=date(2020, 5, 31)) client.query.assert_called_once_with(self.expected_base_query, job_config=mock.ANY) parameters = self.get_job_config_named_parameters(client.query) assert parameters == [ { 'parameterType': { 'type': 'DATE' }, 'parameterValue': { 'value': '2020-05-24' }, 'name': 'one_week_date', }, { 'parameterType': { 'type': 'DATE' }, 'parameterValue': { 'value': '2020-05-03' }, 'name': 'four_weeks_date', }, ]
def test_create_query_with_excluded_guids(self, bigquery_client_mock): client = self.create_mock_client() bigquery_client_mock.from_service_account_json.return_value = client guids = ['guid-1', 'guid-2'] expected_query = f'{self.expected_base_query} WHERE addon_id NOT IN UNNEST(@excluded_addon_ids)' # noqa get_averages_by_addon_from_bigquery(today=date(2020, 5, 31), exclude=guids) client.query.assert_called_once_with(expected_query, job_config=mock.ANY) parameters = self.get_job_config_named_parameters(client.query) assert parameters == [ { 'parameterType': {'type': 'DATE'}, 'parameterValue': {'value': '2020-05-24'}, 'name': 'one_week_date', }, { 'parameterType': {'type': 'DATE'}, 'parameterValue': {'value': '2020-05-03'}, 'name': 'four_weeks_date', }, { 'parameterType': { 'type': 'ARRAY', 'arrayType': {'type': 'STRING'}, }, 'parameterValue': {'arrayValues': [{'value': guid} for guid in guids]}, 'name': 'excluded_addon_ids', }, ]
def test_create_client(self, bigquery_client_mock): client = self.create_mock_client() bigquery_client_mock.from_service_account_json.return_value = client credentials = 'path/to/credentials.json' with override_settings(GOOGLE_APPLICATION_CREDENTIALS=credentials): get_averages_by_addon_from_bigquery(today=date.today()) bigquery_client_mock.from_service_account_json.assert_called_once_with( credentials)
def test_returned_values(self, bigquery_client_mock): results = [ self.create_bigquery_row( { 'addon_id': 'guid', 'avg_this_week': 123, 'avg_three_weeks_before': 456, } ), self.create_bigquery_row( { 'addon_id': 'guid2', 'avg_this_week': 45, 'avg_three_weeks_before': 40, } ), # This should be skipped because `addon_id` is `None`. self.create_bigquery_row( { 'addon_id': None, 'avg_this_week': 123, 'avg_three_weeks_before': 456, } ), ] client = self.create_mock_client(results=results) bigquery_client_mock.from_service_account_json.return_value = client returned_results = get_averages_by_addon_from_bigquery(today=date(2020, 5, 6)) assert returned_results == { 'guid': {'avg_this_week': 123, 'avg_three_weeks_before': 456}, 'guid2': {'avg_this_week': 45, 'avg_three_weeks_before': 40}, }
def deliver_hotness(): """ Calculate hotness of all add-ons. a = avg(users this week) b = avg(users three weeks before this week) threshold = 250 if addon type is theme, else 1000 hotness = (a-b) / b if a > threshold and b > 1 else 0 """ frozen = set(f.addon_id for f in FrozenAddon.objects.all()) averages = get_averages_by_addon_from_bigquery(today=date.today()) addons = (Addon.objects.filter(guid__in=averages.keys()).filter( status__in=amo.REVIEWED_STATUSES).exclude(id__in=frozen)) for addon in addons: average = averages.get(addon.guid) this = average['avg_this_week'] three = average['avg_three_weeks_before'] # Update the hotness score but only update hotness if necessary. We # don't want to cause unnecessary re-indexes. threshold = 250 if addon.type == amo.ADDON_STATICTHEME else 1000 if this > threshold and three > 1: hotness = (this - three) / float(three) if addon.hotness != hotness: addon.update(hotness=hotness) else: if addon.hotness != 0: addon.update(hotness=0) # Let the database catch its breath. time.sleep(10)
def test_create_query(self, bigquery_client_mock): client = self.create_mock_client() bigquery_client_mock.from_service_account_json.return_value = client expected_query = f""" WITH this_week AS ( SELECT addon_id, AVG(dau) AS avg_this_week FROM `project.dataset.{AMO_STATS_DAU_VIEW}` WHERE submission_date >= @one_week_date GROUP BY addon_id), three_weeks_before_this_week AS ( SELECT addon_id, AVG(dau) AS avg_three_weeks_before FROM `project.dataset.{AMO_STATS_DAU_VIEW}` WHERE submission_date BETWEEN @four_weeks_date AND @one_week_date GROUP BY addon_id) SELECT * FROM this_week JOIN three_weeks_before_this_week USING (addon_id) """ get_averages_by_addon_from_bigquery(today=date(2020, 5, 31)) client.query.assert_called_once_with( expected_query, job_config=mock.ANY )
def deliver_hotness(chunk_size=300): """ Calculate hotness of all add-ons. a = avg(users this week) b = avg(users three weeks before this week) threshold = 250 if addon type is theme, else 1000 hotness = (a-b) / b if a > threshold and b > 1 else 0 """ frozen_guids = list(set(fa.addon.guid for fa in FrozenAddon.objects.all())) averages = get_averages_by_addon_from_bigquery(today=date.today(), exclude=frozen_guids) create_chunked_tasks_signatures(update_addon_hotness, averages.items(), chunk_size).apply_async() # Reset add-ons that won't be returned by BigQuery. addon_ids = (Addon.objects.filter(status__in=amo.REVIEWED_STATUSES).filter( hotness__gt=0).exclude(guid__in=averages.keys()).values_list( 'id', flat=True)) create_chunked_tasks_signatures(reset_addon_hotness, addon_ids, chunk_size).apply_async()
def update_addon_hotness(chunk_size=300): """ Calculate hotness of all add-ons. a = avg(users this week) b = avg(users three weeks before this week) threshold = 250 if addon type is theme, else 1000 hotness = (a-b) / b if a > threshold and b > 1 else 0 """ frozen_guids = list( {fa.addon.guid for fa in FrozenAddon.objects.all() if fa.addon.guid} ) log.info('Found %s frozen add-on GUIDs.', len(frozen_guids)) amo_guids = ( Addon.objects.exclude(guid__in=frozen_guids) .exclude(guid__isnull=True) .exclude(guid__exact='') .exclude(hotness=0) .values_list('guid', flat=True) ) averages = { guid: {'avg_this_week': 1, 'avg_three_weeks_before': 1} for guid in amo_guids } log.info('Found %s add-on GUIDs in AMO DB.', len(averages)) bq_averages = get_averages_by_addon_from_bigquery( today=date.today(), exclude=frozen_guids ) log.info('Found %s add-on GUIDs with averages in BigQuery.', len(bq_averages)) averages.update(bq_averages) log.info('Preparing update of `hotness` for %s add-ons.', len(averages)) create_chunked_tasks_signatures( _update_addon_hotness, averages.items(), chunk_size ).apply_async()