def test_os_distribution_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) with_country_list = os_on_date(main_summary, '20180201', country_list=['DE']) expected = [{ "country": "All", "submission_date_s3": "20180201", "os": "Windows 10", "pct_on_os": 50.0 }, { "country": "All", "submission_date_s3": "20180201", "os": "Mac OS X", "pct_on_os": 50.0 }, { "country": "DE", "submission_date_s3": "20180201", "os": "Mac OS X", "pct_on_os": 50.0 }, { "country": "DE", "submission_date_s3": "20180201", "os": "Windows 10", "pct_on_os": 50.0 }] is_same(spark, with_country_list, expected)
def test_locale_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) with_country_list = locale_on_date(main_summary, '20180201', 4, country_list=['DE']) expected = [{ "country": "All", "submission_date_s3": "20180201", "locale": "en-US", "pct_on_locale": 50.0 }, { "country": "All", "submission_date_s3": "20180201", "locale": "DE", "pct_on_locale": 50.0 }, { "country": "DE", "submission_date_s3": "20180201", "locale": "en-US", "pct_on_locale": 50.0 }, { "country": "DE", "submission_date_s3": "20180201", "locale": "DE", "pct_on_locale": 50.0 }] is_same(spark, with_country_list, expected)
def test_get_avg_daily_usage_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = get_daily_avg_session(main_summary, "20180201") expected = [{ "country": "All", "submission_date_s3": "20180201", "avg_daily_usage(hours)": 300.0 / 3600 / 2.0 }] is_same(spark, without_country_list, expected)
def test_get_avg_intensity_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = get_avg_intensity(main_summary, "20180201") expected = [{ "country": "All", "submission_date_s3": "20180201", "avg_intensity": 1.0 }] is_same(spark, without_country_list, expected)
def test_pct_tracking_protection_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = pct_tracking_protection(main_summary, '20180201') expected = [{ "submission_date_s3": "20180201", "country": "All", "pct_TP": 50.0 }] is_same(spark, without_country_list, expected)
def test_has_addons_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = get_addon(main_summary, '20180201') expected = [{ "country": "All", "submission_date_s3": "20180201", "pct_addon": 100.0 }] is_same(spark, without_country_list, expected)
def test_new_users_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = new_users(main_summary, '20180201') expected = [{ "country": "All", "submission_date_S3": "20180201", "pct_new_user": 50.0 }] is_same(spark, without_country_list, expected)
def test_pct_latest_version_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = pct_new_version(main_summary, "20180201") expected = [{ "country": "All", "submission_date_s3": "20180201", "pct_latest_version": 50.0 }] is_same(spark, without_country_list, expected)
def test_top_10_addons_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = top_10_addons_on_date(main_summary, '20180201', 5) expected = [{ "country": "All", "submission_date_s3": "20180201", "addon_id": 'disableSHA1rollout', "addon_name": 'SHA-1 deprecation staged rollout', "pct_with_addon": 100.0 }] is_same(spark, without_country_list, expected)
def test_YAU_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = getYAU(main_summary, '20180201', sample_factor=100.0 / 1) expected = [{ "country": "All", "MAU": 200, "submission_date_s3": "20180201" }] is_same(spark, without_country_list, expected)
def test_MAU_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) without_country_list = getMAU(main_summary, date='20180201', sample_factor=100.0 / 1) expected = [{ "country": "All", "active_users": 200, "submission_date_s3": "20180201" }] is_same(spark, without_country_list, expected, verbose=True)
def test_integration_missing_fields_no_country_list( spark, main_summary_data_null_value): ''' tests without country list for data with all/some of a given field are null, '', or zero ''' main_summary = spark.createDataFrame(*main_summary_data_null_value) usage, os, locales, top10addon = agg_usage(main_summary, date='20180201', period=7, sample_factor=100.0 / 1, country_list=None) expected_usage = [{ "submission_date_s3": "20180201", "country": "All", "avg_daily_usage(hours)": 500.0 / 3600 / 4.0, "avg_intensity": 0.75, "pct_latest_version": 0.0, "pct_TP": 25.0, "MAU": 400, "YAU": 400, "pct_new_user": 25.0, "pct_addon": 100.0 }] expected_os = [{ "country": "All", "submission_date_s3": "20180201", "os": "Windows 10", "pct_on_os": 25.0 }, { "country": "All", "submission_date_s3": "20180201", "os": "Mac OS X", "pct_on_os": 75.0 }] expected_locales = [{ "country": "All", "submission_date_s3": "20180201", "locale": "en-US", "pct_on_locale": 50.0 }, { "country": "All", "submission_date_s3": "20180201", "locale": "DE", "pct_on_locale": 50.0 }] expected_addons = [{ "country": "All", "submission_date_s3": "20180201", "addon_id": u'disableSHA1rollout', "addon_name": u'SHA-1 deprecation staged rollout', "pct_with_addon": 100.0 }] is_same(spark, usage, expected_usage) is_same(spark, os, expected_os) is_same(spark, locales, expected_locales) is_same(spark, top10addon, expected_addons)
def test_integration_no_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) usage, os, locales, top10addon = agg_usage(main_summary, date='20180201', period=1, sample_factor=100.0 / 1, country_list=None) expected_usage = [{ "submission_date_s3": "20180201", "country": "All", "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, "avg_intensity": 1.0, "pct_latest_version": 50.0, "pct_TP": 50.0, "MAU": 200, "YAU": 200, "pct_new_user": 50.0, "pct_addon": 100.0 }] expected_os = [{ "country": "All", "submission_date_s3": "20180201", "os": "Windows 10", "pct_on_os": 50.0 }, { "country": "All", "submission_date_s3": "20180201", "os": "Mac OS X", "pct_on_os": 50.0 }] expected_locales = [{ "country": "All", "submission_date_s3": "20180201", "locale": "en-US", "pct_on_locale": 50.0 }, { "country": "All", "submission_date_s3": "20180201", "locale": "DE", "pct_on_locale": 50.0 }] expected_addons = [{ "country": "All", "submission_date_s3": "20180201", "addon_id": u'disableSHA1rollout', "addon_name": u'SHA-1 deprecation staged rollout', "pct_with_addon": 100.0 }] is_same(spark, usage, expected_usage) is_same(spark, os, expected_os) is_same(spark, locales, expected_locales) is_same(spark, top10addon, expected_addons)
def test_MAU_country_list(spark, main_summary_data): main_summary = spark.createDataFrame(*main_summary_data) with_country_list = getMAU(main_summary, date='20180201', sample_factor=100.0 / 1, country_list=["DE"]) expected = [{ "country": "All", "MAU": 200, "submission_date_s3": "20180201" }, { "country": "DE", "MAU": 200, "submission_date_s3": "20180201" }] is_same(spark, with_country_list, expected)
def test_integration_multiple_countries_and_days_no_country_list( spark, main_summary_data_multiple): ''' tests without country list for data including the following cases: - multiple countries a)include countries that are not in country list b)include countries into country_list that are not in data - clients with only pings from outside date range - clients with some pings from outside date range ''' main_summary = spark.createDataFrame(*main_summary_data_multiple) usage, locales, top10addon = agg_usage(main_summary, date='20180201', period=7, sample_factor=100.0 / 1, country_list=None) expected_usage = [{ "submission_date_s3": "20180201", "country": "All", "avg_daily_usage(hours)": 600.0 / 3600 / 5.0, "avg_intensity": 1.0, "pct_latest_version": 80.0, "pct_TP": 20.0, "MAU": 500, "YAU": 600, "pct_new_user": 40.0, "pct_addon": 100.0 }] expected_locales = [{ "country": "All", "submission_date_s3": "20180201", "locale": "en-US", "pct_on_locale": 60.0 }, { "country": "All", "submission_date_s3": "20180201", "locale": "DE", "pct_on_locale": 40.0 }] expected_addons = [{ "country": "All", "submission_date_s3": "20180201", "addon_id": u'disableSHA1rollout', "addon_name": u'SHA-1 deprecation staged rollout', "pct_with_addon": 100.0 }] is_same(spark, usage, expected_usage) is_same(spark, locales, expected_locales) is_same(spark, top10addon, expected_addons)