def test_update_history_webusage_without_history(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) usage, os, locales, top10addon = agg_usage(main_summary, date='20180201', period=1, sample_factor=100.0 / 1, country_list=['DE']) usage_df = usage.toPandas() os_df = os.toPandas() locales_df = locales.toPandas() top10addon_df = top10addon.toPandas() fxhealth, webusage = all_metrics_per_day(['DE'], usage_pd_df=usage_df, os_pd_df=os_df, locales_pd_df=locales_df, topaddons_pd_df=top10addon_df) updated_webusage = update_history(webusage, None) expected_webusage = { 'DE': [{ "date": "2018-02-01", "metrics": { "pct_TP": 50.0, "pct_addon": 100.0, "os": { u"Mac OS X": 50.0, u"Windows 10": 50.0 }, "locale": { u"en-US": 50.0, u"DE": 50.0 }, "top10addons": { u'SHA-1 deprecation staged rollout': 100.0 } } }], 'All': [{ "date": "2018-02-01", "metrics": { "pct_TP": 50.0, "pct_addon": 100.0, "os": { u"Mac OS X": 50.0, u"Windows 10": 50.0 }, "locale": { u"en-US": 50.0, u"DE": 50.0 }, "top10addons": { u'SHA-1 deprecation staged rollout': 100.0 } } }] } assert expected_webusage == updated_webusage
def test_integration_missing_fields_no_country_list( spark, main_summary_data_null_value): ''' tests without country list for data with all/some of a given field are null, '', or zero ''' main_summary = spark.createDataFrame(*main_summary_data_null_value) usage, os, locales, top10addon = agg_usage(main_summary, date='20180201', period=7, sample_factor=100.0 / 1, country_list=None) expected_usage = [{ "submission_date_s3": "20180201", "country": "All", "avg_daily_usage(hours)": 500.0 / 3600 / 4.0, "avg_intensity": 0.75, "pct_latest_version": 0.0, "pct_TP": 25.0, "MAU": 400, "YAU": 400, "pct_new_user": 25.0, "pct_addon": 100.0 }] expected_os = [{ "country": "All", "submission_date_s3": "20180201", "os": "Windows 10", "pct_on_os": 25.0 }, { "country": "All", "submission_date_s3": "20180201", "os": "Mac OS X", "pct_on_os": 75.0 }] expected_locales = [{ "country": "All", "submission_date_s3": "20180201", "locale": "en-US", "pct_on_locale": 50.0 }, { "country": "All", "submission_date_s3": "20180201", "locale": "DE", "pct_on_locale": 50.0 }] expected_addons = [{ "country": "All", "submission_date_s3": "20180201", "addon_id": u'disableSHA1rollout', "addon_name": u'SHA-1 deprecation staged rollout', "pct_with_addon": 100.0 }] is_same(spark, usage, expected_usage) is_same(spark, os, expected_os) is_same(spark, locales, expected_locales) is_same(spark, top10addon, expected_addons)
def test_integration_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) usage, os, locales, top10addon = agg_usage(main_summary, date='20180201', period=1, sample_factor=100.0 / 1, country_list=None) expected_usage = [{ "submission_date_s3": "20180201", "country": "All", "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, "avg_intensity": 1.0, "pct_latest_version": 50.0, "pct_TP": 50.0, "MAU": 200, "YAU": 200, "pct_new_user": 50.0, "pct_addon": 100.0 }] expected_os = [{ "country": "All", "submission_date_s3": "20180201", "os": "Windows 10", "pct_on_os": 50.0 }, { "country": "All", "submission_date_s3": "20180201", "os": "Mac OS X", "pct_on_os": 50.0 }] expected_locales = [{ "country": "All", "submission_date_s3": "20180201", "locale": "en-US", "pct_on_locale": 50.0 }, { "country": "All", "submission_date_s3": "20180201", "locale": "DE", "pct_on_locale": 50.0 }] expected_addons = [{ "country": "All", "submission_date_s3": "20180201", "addon_id": u'disableSHA1rollout', "addon_name": u'SHA-1 deprecation staged rollout', "pct_with_addon": 100.0 }] is_same(spark, usage, expected_usage) is_same(spark, os, expected_os) is_same(spark, locales, expected_locales) is_same(spark, top10addon, expected_addons)
def test_integration_multiple_countries_and_days_no_country_list( spark, main_summary_data_multiple): ''' tests without country list for data including the following cases: - multiple countries a)include countries that are not in country list b)include countries into country_list that are not in data - clients with only pings from outside date range - clients with some pings from outside date range ''' main_summary = spark.createDataFrame(*main_summary_data_multiple) usage, locales, top10addon = agg_usage(main_summary, date='20180201', period=7, sample_factor=100.0 / 1, country_list=None) expected_usage = [{ "submission_date_s3": "20180201", "country": "All", "avg_daily_usage(hours)": 600.0 / 3600 / 5.0, "avg_intensity": 1.0, "pct_latest_version": 80.0, "pct_TP": 20.0, "MAU": 500, "YAU": 600, "pct_new_user": 40.0, "pct_addon": 100.0 }] expected_locales = [{ "country": "All", "submission_date_s3": "20180201", "locale": "en-US", "pct_on_locale": 60.0 }, { "country": "All", "submission_date_s3": "20180201", "locale": "DE", "pct_on_locale": 40.0 }] expected_addons = [{ "country": "All", "submission_date_s3": "20180201", "addon_id": u'disableSHA1rollout', "addon_name": u'SHA-1 deprecation staged rollout', "pct_with_addon": 100.0 }] is_same(spark, usage, expected_usage) is_same(spark, locales, expected_locales) is_same(spark, top10addon, expected_addons)
def test_update_history_fxhealth_without_history(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) usage, os, locales, top10addon = agg_usage(main_summary, date='20180201', period=1, sample_factor=100.0 / 1, country_list=['DE']) usage_df = usage.toPandas() os_df = os.toPandas() locales_df = locales.toPandas() top10addon_df = top10addon.toPandas() fxhealth, webusage = all_metrics_per_day(['DE'], usage_pd_df=usage_df, os_pd_df=os_df, locales_pd_df=locales_df, topaddons_pd_df=top10addon_df) updated_fxhealth = update_history(fxhealth, None) expected_fxhealth = { 'DE': [{ "date": "2018-02-01", "metrics": { "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, "avg_intensity": 1.0, "pct_latest_version": 50.0, "MAU": 200.0, "YAU": 200.0, "pct_new_user": 50.0 } }], 'All': [{ "date": "2018-02-01", "metrics": { "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, "avg_intensity": 1.0, "pct_latest_version": 50.0, "MAU": 200.0, "YAU": 200.0, "pct_new_user": 50.0 } }] } assert expected_fxhealth == updated_fxhealth
def test_processing_one_day(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) usage, locales, top10addon = agg_usage(main_summary, date='20180201', period=1, sample_factor=100.0 / 1, country_list=['DE']) usage_df = usage.toPandas() locales_df = locales.toPandas() top10addon_df = top10addon.toPandas() fxhealth, webusage = all_metrics_per_day(['DE'], usage_pd_df=usage_df, locales_pd_df=locales_df, topaddons_pd_df=top10addon_df) expected_fxhealth = { 'DE': { "date": "2018-02-01", "metrics": { "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, "avg_intensity": 1.0, "pct_latest_version": 50.0, "MAU": 200.0, "YAU": 200.0, "pct_new_user": 50.0 } }, 'All': { "date": "2018-02-01", "metrics": { "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, "avg_intensity": 1.0, "pct_latest_version": 50.0, "MAU": 200.0, "YAU": 200.0, "pct_new_user": 50.0 } } } expected_webusage = { 'DE': { "date": "2018-02-01", "metrics": { "pct_TP": 50.0, "pct_addon": 100.0, "locale": { u"en-US": 50.0, u"DE": 50.0 }, "top10addons": { u'SHA-1 deprecation staged rollout': 100.0 } } }, 'All': { "date": "2018-02-01", "metrics": { "pct_TP": 50.0, "pct_addon": 100.0, "locale": { u"en-US": 50.0, u"DE": 50.0 }, "top10addons": { u'SHA-1 deprecation staged rollout': 100.0 } } } } assert expected_fxhealth == fxhealth assert expected_webusage == webusage