def test_get_timelimits_if_ready(experiments): config = AnalysisSpec().resolve(experiments[0]) config2 = AnalysisSpec().resolve(experiments[2]) analysis = Analysis("test", "test", config) analysis2 = Analysis("test", "test", config2) date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(0) assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAY, date) is None assert analysis._get_timelimits_if_ready(AnalysisPeriod.WEEK, date) is None date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(2) assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAY, date) is None assert analysis._get_timelimits_if_ready(AnalysisPeriod.WEEK, date) is None date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(7) assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAY, date) assert analysis._get_timelimits_if_ready(AnalysisPeriod.WEEK, date) is None date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(days=13) assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAY, date) assert analysis._get_timelimits_if_ready(AnalysisPeriod.WEEK, date) date = dt.datetime(2020, 2, 29, tzinfo=pytz.utc) assert analysis._get_timelimits_if_ready(AnalysisPeriod.OVERALL, date) is None date = dt.datetime(2020, 3, 1, tzinfo=pytz.utc) assert analysis._get_timelimits_if_ready(AnalysisPeriod.OVERALL, date) assert analysis2._get_timelimits_if_ready(AnalysisPeriod.OVERALL, date) is None date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(days=34) assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAYS_28, date)
def test_regression_20200316(): experiment_json = r""" { "experiment_url": "https://blah/experiments/search-tips-aka-nudges/", "type": "addon", "name": "Search Tips aka Nudges", "slug": "search-tips-aka-nudges", "public_name": "Search Tips", "public_description": "Search Tips are designed to increase engagement with the QuantumBar.", "status": "Live", "countries": [], "platform": "All Platforms", "start_date": 1578960000000, "end_date": 1584921600000, "population": "2% of Release Firefox 72.0 to 74.0", "population_percent": "2.0000", "firefox_channel": "Release", "firefox_min_version": "72.0", "firefox_max_version": "74.0", "addon_experiment_id": null, "addon_release_url": "https://bugzilla.mozilla.org/attachment.cgi?id=9120542", "pref_branch": null, "pref_name": null, "pref_type": null, "proposed_start_date": 1578960000000, "proposed_enrollment": 21, "proposed_duration": 69, "normandy_slug": "addon-search-tips-aka-nudges-release-72-74-bug-1603564", "normandy_id": 902, "other_normandy_ids": [], "variants": [ { "description": "Standard address bar experience", "is_control": false, "name": "control", "ratio": 50, "slug": "control", "value": null, "addon_release_url": null, "preferences": [] }, { "description": "", "is_control": true, "name": "treatment", "ratio": 50, "slug": "treatment", "value": null, "addon_release_url": null, "preferences": [] } ] } """ experiment = ExperimentV1.from_dict( json.loads(experiment_json)).to_experiment() config = AnalysisSpec().resolve(experiment) analysis = Analysis("test", "test", config) analysis.run(current_date=dt.datetime(2020, 3, 16, tzinfo=pytz.utc), dry_run=True)
def test_no_enrollments(self, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment-2", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="a", ratio=0.5), Branch(slug="b", ratio=0.5) ], reference_branch="a", features=[], normandy_slug="test-experiment-2", ) config = AnalysisSpec().resolve(experiment) self.analysis_mock_run(config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_2_week_1` ORDER BY enrollment_date DESC """) assert query_job.result().total_rows == 0 stats = client.client.list_rows( f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_week_1" ).to_dataframe() count_by_branch = stats.query("statistic == 'count'").set_index( "branch") assert count_by_branch.loc["a", "point"] == 0.0 assert count_by_branch.loc["b", "point"] == 0.0 assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_weekly" ) is not None)
def test_regression_20200320(): experiment_json = r""" { "experiment_url": "https://experimenter.services.mozilla.com/experiments/impact-of-level-2-etp-on-a-custom-distribution/", "type": "pref", "name": "Impact of Level 2 ETP on a Custom Distribution", "slug": "impact-of-level-2-etp-on-a-custom-distribution", "public_name": "Impact of Level 2 ETP", "status": "Live", "start_date": 1580169600000, "end_date": 1595721600000, "proposed_start_date": 1580169600000, "proposed_enrollment": null, "proposed_duration": 180, "normandy_slug": "pref-impact-of-level-2-etp-on-a-custom-distribution-release-72-80-bug-1607493", "normandy_id": 906, "other_normandy_ids": [], "variants": [ { "description": "", "is_control": true, "name": "treatment", "ratio": 100, "slug": "treatment", "value": "true", "addon_release_url": null, "preferences": [] } ] } """ # noqa experiment = ExperimentV1.from_dict( json.loads(experiment_json)).to_experiment() config = AnalysisSpec().resolve(experiment) analysis = Analysis("test", "test", config) with pytest.raises(NoEnrollmentPeriodException): analysis.run(current_date=dt.datetime(2020, 3, 19, tzinfo=pytz.utc), dry_run=True)
def test_metrics(self, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="branch1", ratio=0.5), Branch(slug="branch2", ratio=0.5) ], reference_branch="branch2", features=[], normandy_slug="test-experiment", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expr=agg_sum("active_hours_sum"), ) config.metrics = { AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())] } self.analysis_mock_run(config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_week_1` ORDER BY enrollment_date DESC """) expected_metrics_results = [ { "client_id": "bbbb", "branch": "branch2", "enrollment_date": datetime.date(2020, 4, 3), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, { "client_id": "aaaa", "branch": "branch1", "enrollment_date": datetime.date(2020, 4, 2), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, ] for i, row in enumerate(query_job.result()): for k, v in expected_metrics_results[i].items(): assert row[k] == v assert (client.client.get_table( f"{project_id}.{temporary_dataset}.test_experiment_weekly") is not None) assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ) is not None) stats = client.client.list_rows( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ).to_dataframe() count_by_branch = stats.query("statistic == 'count'").set_index( "branch") assert count_by_branch.loc["branch1", "point"] == 1.0 assert count_by_branch.loc["branch2", "point"] == 1.0 assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly" ) is not None)
def test_logging(self, monkeypatch, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="branch1", ratio=0.5), Branch(slug="branch2", ratio=0.5) ], reference_branch="branch2", normandy_slug="test-experiment", is_high_population=False, app_name="firefox_desktop", app_id="firefox-desktop", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expression=agg_sum("active_hours_sum"), ) config.metrics = { AnalysisPeriod.WEEK: [ Summary(test_active_hours, BootstrapMean(confidence_interval=10)) ] } log_config = LogConfiguration( log_project_id=project_id, log_dataset_id=temporary_dataset, log_table_id="logs", log_to_bigquery=True, task_profiling_log_table_id="task_profiling_logs", task_monitoring_log_table_id="task_monitoring_logs", capacity=1, ) self.analysis_mock_run(monkeypatch, config, static_dataset, temporary_dataset, project_id, log_config) assert client.client.get_table( f"{project_id}.{temporary_dataset}.logs") is not None logs = list( client.client.list_rows(f"{project_id}.{temporary_dataset}.logs")) assert len(logs) >= 1 error_logs = [log for log in logs if log.get("log_level") == "ERROR"] assert ( "Error while computing statistic bootstrap_mean for metric active_hours" in error_logs[0].get("message")) assert error_logs[0].get("log_level") == "ERROR"
def test_with_segments(self, monkeypatch, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="branch1", ratio=0.5), Branch(slug="branch2", ratio=0.5) ], reference_branch="branch2", normandy_slug="test-experiment", is_high_population=False, app_name="firefox_desktop", app_id="firefox-desktop", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expression=agg_sum("active_hours_sum"), ) test_clients_last_seen = SegmentDataSource( "clients_last_seen", f"`{project_id}.test_data.clients_last_seen`") regular_user_v3 = Segment( "regular_user_v3", test_clients_last_seen, "COALESCE(LOGICAL_OR(is_regular_user_v3), FALSE)", ) config.experiment.segments = [regular_user_v3] config.metrics = { AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())] } self.analysis_mock_run(monkeypatch, config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_enrollments_week_1` ORDER BY enrollment_date DESC """) expected_metrics_results = [ { "client_id": "bbbb", "branch": "branch2", "enrollment_date": datetime.date(2020, 4, 3), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, "regular_user_v3": True, }, { "client_id": "aaaa", "branch": "branch1", "enrollment_date": datetime.date(2020, 4, 2), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, "regular_user_v3": False, }, ] for i, row in enumerate(query_job.result()): for k, v in expected_metrics_results[i].items(): assert row[k] == v assert (client.client.get_table( f"{project_id}.{temporary_dataset}.test_experiment_enrollments_weekly" ) is not None) assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ) is not None) stats = client.client.list_rows( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ).to_dataframe() # Only one count per segment and branch, please assert (stats.query( "metric == 'identity' and statistic == 'count'").groupby( ["segment", "analysis_basis", "window_index", "branch"]).size() == 1).all() count_by_branch = stats.query( "segment == 'all' and statistic == 'count'").set_index("branch") assert count_by_branch.loc["branch1", "point"] == 1.0 assert count_by_branch.loc["branch2", "point"] == 1.0 assert count_by_branch.loc["branch2", "analysis_basis"] == "enrollments"
def test_no_enrollments(self, monkeypatch, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment-2", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="a", ratio=0.5), Branch(slug="b", ratio=0.5) ], reference_branch="a", normandy_slug="test-experiment-2", is_high_population=False, app_name="firefox_desktop", app_id="firefox-desktop", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expression=agg_sum("active_hours_sum"), ) config.metrics = { AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())] } self.analysis_mock_run(monkeypatch, config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_2_enrollments_week_1` ORDER BY enrollment_date DESC """) assert query_job.result().total_rows == 0 stats = client.client.list_rows( f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_week_1" ).to_dataframe() count_by_branch = stats.query("statistic == 'count'").set_index( "branch") assert count_by_branch.loc["a", "point"] == 0.0 assert count_by_branch.loc["b", "point"] == 0.0 assert count_by_branch.loc["b", "analysis_basis"] == "enrollments" assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_weekly" ) is not None)
def test_metrics_with_exposure(self, monkeypatch, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="branch1", ratio=0.5), Branch(slug="branch2", ratio=0.5) ], reference_branch="branch2", normandy_slug="test-experiment", is_high_population=False, app_name="firefox_desktop", app_id="firefox-desktop", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expression=agg_sum("active_hours_sum"), analysis_bases=[AnalysisBasis.EXPOSURES], ) config.metrics = { AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())] } config.experiment.exposure_signal = ExposureSignal( name="ad_exposure", data_source=test_clients_daily, select_expression="active_hours_sum > 0", friendly_name="Ad exposure", description="Clients have clicked on ad", window_start="enrollment_start", window_end="analysis_window_end", ) self.analysis_mock_run(monkeypatch, config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_exposures_week_1` ORDER BY enrollment_date DESC """) expected_metrics_results = [ { "client_id": "bbbb", "branch": "branch2", "enrollment_date": datetime.date(2020, 4, 3), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, { "client_id": "aaaa", "branch": "branch1", "enrollment_date": datetime.date(2020, 4, 2), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, ] r = query_job.result() for i, row in enumerate(r): for k, v in expected_metrics_results[i].items(): assert row[k] == v assert (client.client.get_table( f"{project_id}.{temporary_dataset}.test_experiment_exposures_weekly" ) is not None) assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ) is not None) assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly" ) is not None)