def test_process_metrics(spark): exp = Experiment('a-stub', '20190101', num_dates_enrollment=8) enrollments = exp.get_enrollments(spark, _get_enrollment_view(slug="a-stub")) ds_df_A = register_data_source_fixture(spark, name='ds_df_A') ds_df_B = register_data_source_fixture(spark, name='ds_df_B') ds_A = DataSource.from_dataframe('ds_df_A', ds_df_A) ds_B = DataSource.from_dataframe('ds_df_B', ds_df_B) m1 = Metric.from_col('m1', ds_df_A.numeric_col, ds_A) m2 = Metric.from_col('m2', ds_df_A.bool_col, ds_A) m3 = Metric.from_col('m3', ds_df_B.numeric_col, ds_B) metric_list = [m1, m2, m3] exp = Experiment('a-stub', '20190101') data_sources_and_metrics = exp._process_metrics(enrollments, metric_list) assert len(data_sources_and_metrics) == 2 assert len(data_sources_and_metrics[ds_df_A]) == 2 assert len(data_sources_and_metrics[ds_df_B]) == 1 assert 'numeric_col' in repr(data_sources_and_metrics[ds_df_B][0]) assert '`m3`' in repr(data_sources_and_metrics[ds_df_B][0]) assert repr(data_sources_and_metrics[ds_df_B][0]) in { "Column<b'numeric_col AS `m3`'>", # py3 "Column<numeric_col AS `m3`>", # py2 }
def test_complains_about_template_without_default(): with pytest.raises(ValueError): DataSource( name="foo", from_expr="moz-fx-data-shared-prod.{dataset}.foo", ) DataSource( name="foo", from_expr="moz-fx-data-shared-prod.{dataset}.foo", default_dataset="dataset", )
def test_datasource_constructor_fails(name, from_expr, experiments_column_type, error): with pytest.raises(error): DataSource( name=name, from_expr=from_expr, experiments_column_type=experiments_column_type, )
def _get_metrics(spark): ds_df = _get_data_source_df(spark) ds = DataSource.from_dataframe('bla_ds', ds_df) return { 'how_many_ones': Metric.from_col('how_many_ones', agg_sum(ds_df.constant_one), ds), }
def test_to_mozanalysis_metric(self): metric = Metric( name="test", data_source=DataSource(name="test_data_source", from_expr="test.test"), select_expression="test", analysis_bases=[AnalysisBasis.EXPOSURES], ) mozanalysis_metric = metric.to_mozanalysis_metric() assert mozanalysis_metric assert mozanalysis_metric.name == metric.name assert metric.analysis_bases == [AnalysisBasis.EXPOSURES]
def test_process_metrics_dupe_data_source(spark): exp = Experiment('a-stub', '20190101', num_dates_enrollment=8) enrollments = exp.get_enrollments(spark, _get_enrollment_view(slug="a-stub")) ds_df = register_data_source_fixture(spark, name='ds_df_A') ds_1 = DataSource.from_dataframe('ds_df_A', ds_df) ds_2 = DataSource.from_dataframe('ds_df_A', ds_df) m1 = Metric.from_col('m1', ds_df.numeric_col, ds_1) m2 = Metric.from_col('m2', ds_df.bool_col, ds_2) metric_list = [m1, m2] exp = Experiment('a-stub', '20190101') data_sources_and_metrics = exp._process_metrics(enrollments, metric_list) assert len(data_sources_and_metrics) == 1 assert len(data_sources_and_metrics[ds_df]) == 2
def _make_metric_list(report): metric_list = list() for metric in report['metrics']: try: metric_list.append(getattr(desktop, metric)) except AttributeError: print(f'`{metric}` is not a pre-defined Metric. Will skip') if 'user_defined_metrics' in report: for data_source, data_source_metrics \ in report['user_defined_metrics'].items(): if not getattr(desktop, data_source, None): from_expr = report['user_defined_data_source'][data_source] data_source = DataSource(name=data_source, from_expr=from_expr, experiments_column_type='native') else: data_source = getattr(desktop, data_source) for key, select_expr in data_source_metrics.items(): new_metric = Metric(name=key, data_source=data_source, select_expr=select_expr) metric_list.append(new_metric) return metric_list
# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # from pyspark.sql import functions as F from mozanalysis.metrics import Metric, DataSource, agg_sum, agg_any from mozanalysis.utils import all_ # , any_ clients_daily = DataSource.from_table_name('clients_daily') main_summary = DataSource.from_table_name('main_summary') search_clients_daily = DataSource.from_table_name('search_clients_daily') events = DataSource.from_table_name('events') @DataSource.from_func() def telemetry_shield_study_parquet(spark, experiment): """DataSource commonly used with addon studies. Used when we need to collect experiment-specific telemetry. We filter to just include the data submitted by this experiment's addon. """ tssp = spark.table('telemetry_shield_study_parquet') this_exp = tssp.filter(tssp.payload.study_name == experiment.experiment_slug).withColumnRenamed( 'submission', 'submission_date_s3') if experiment.addon_version is None: return this_exp
def test_logging(self, monkeypatch, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="branch1", ratio=0.5), Branch(slug="branch2", ratio=0.5) ], reference_branch="branch2", normandy_slug="test-experiment", is_high_population=False, app_name="firefox_desktop", app_id="firefox-desktop", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expression=agg_sum("active_hours_sum"), ) config.metrics = { AnalysisPeriod.WEEK: [ Summary(test_active_hours, BootstrapMean(confidence_interval=10)) ] } log_config = LogConfiguration( log_project_id=project_id, log_dataset_id=temporary_dataset, log_table_id="logs", log_to_bigquery=True, task_profiling_log_table_id="task_profiling_logs", task_monitoring_log_table_id="task_monitoring_logs", capacity=1, ) self.analysis_mock_run(monkeypatch, config, static_dataset, temporary_dataset, project_id, log_config) assert client.client.get_table( f"{project_id}.{temporary_dataset}.logs") is not None logs = list( client.client.list_rows(f"{project_id}.{temporary_dataset}.logs")) assert len(logs) >= 1 error_logs = [log for log in logs if log.get("log_level") == "ERROR"] assert ( "Error while computing statistic bootstrap_mean for metric active_hours" in error_logs[0].get("message")) assert error_logs[0].get("log_level") == "ERROR"
def test_with_segments(self, monkeypatch, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="branch1", ratio=0.5), Branch(slug="branch2", ratio=0.5) ], reference_branch="branch2", normandy_slug="test-experiment", is_high_population=False, app_name="firefox_desktop", app_id="firefox-desktop", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expression=agg_sum("active_hours_sum"), ) test_clients_last_seen = SegmentDataSource( "clients_last_seen", f"`{project_id}.test_data.clients_last_seen`") regular_user_v3 = Segment( "regular_user_v3", test_clients_last_seen, "COALESCE(LOGICAL_OR(is_regular_user_v3), FALSE)", ) config.experiment.segments = [regular_user_v3] config.metrics = { AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())] } self.analysis_mock_run(monkeypatch, config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_enrollments_week_1` ORDER BY enrollment_date DESC """) expected_metrics_results = [ { "client_id": "bbbb", "branch": "branch2", "enrollment_date": datetime.date(2020, 4, 3), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, "regular_user_v3": True, }, { "client_id": "aaaa", "branch": "branch1", "enrollment_date": datetime.date(2020, 4, 2), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, "regular_user_v3": False, }, ] for i, row in enumerate(query_job.result()): for k, v in expected_metrics_results[i].items(): assert row[k] == v assert (client.client.get_table( f"{project_id}.{temporary_dataset}.test_experiment_enrollments_weekly" ) is not None) assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ) is not None) stats = client.client.list_rows( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ).to_dataframe() # Only one count per segment and branch, please assert (stats.query( "metric == 'identity' and statistic == 'count'").groupby( ["segment", "analysis_basis", "window_index", "branch"]).size() == 1).all() count_by_branch = stats.query( "segment == 'all' and statistic == 'count'").set_index("branch") assert count_by_branch.loc["branch1", "point"] == 1.0 assert count_by_branch.loc["branch2", "point"] == 1.0 assert count_by_branch.loc["branch2", "analysis_basis"] == "enrollments"
def test_metrics(self, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="branch1", ratio=0.5), Branch(slug="branch2", ratio=0.5) ], reference_branch="branch2", features=[], normandy_slug="test-experiment", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expr=agg_sum("active_hours_sum"), ) config.metrics = { AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())] } self.analysis_mock_run(config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_week_1` ORDER BY enrollment_date DESC """) expected_metrics_results = [ { "client_id": "bbbb", "branch": "branch2", "enrollment_date": datetime.date(2020, 4, 3), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, { "client_id": "aaaa", "branch": "branch1", "enrollment_date": datetime.date(2020, 4, 2), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, ] for i, row in enumerate(query_job.result()): for k, v in expected_metrics_results[i].items(): assert row[k] == v assert (client.client.get_table( f"{project_id}.{temporary_dataset}.test_experiment_weekly") is not None) assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ) is not None) stats = client.client.list_rows( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ).to_dataframe() count_by_branch = stats.query("statistic == 'count'").set_index( "branch") assert count_by_branch.loc["branch1", "point"] == 1.0 assert count_by_branch.loc["branch2", "point"] == 1.0 assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly" ) is not None)
# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from textwrap import dedent from mozanalysis.metrics import DataSource, Metric, agg_any, agg_sum #: DataSource: The clients_daily table. clients_daily = DataSource( name="clients_daily", from_expr="mozdata.telemetry.clients_daily", ) #: DataSource: The `search_clients_engines_sources_daily`_ table. #: This table unpacks search counts from the main ping; #: it contains one row per (client_id, submission_date, engine, source). #: #: .. _`search_clients_engines_sources_daily`: https://docs.telemetry.mozilla.org/ #: datasets/search/search_clients_engines_sources_daily/reference.html search_clients_engines_sources_daily = DataSource( name="search_clients_engines_sources_daily", from_expr="mozdata.search.search_clients_engines_sources_daily", experiments_column_type=None, ) #: DataSource: A clone of `search_clients_engines_sources_daily`. #: Exists for backwards compatibility; new uses should use the new name. search_clients_daily = search_clients_engines_sources_daily #: DataSource: The main_summary table. main_summary = DataSource(name="main_summary",
# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from mozanalysis.metrics import Metric, DataSource, agg_sum, agg_any clients_daily = DataSource( name='clients_daily', from_expr="`moz-fx-data-shared-prod.telemetry.clients_daily`", ) search_clients_daily = DataSource( name='search_clients_daily', from_expr='`moz-fx-data-shared-prod.search.search_clients_daily`', experiments_column_type=None, ) main_summary = DataSource( name='main_summary', from_expr="`moz-fx-data-shared-prod.telemetry.main_summary`") events = DataSource( name='events', from_expr="`moz-fx-data-shared-prod.telemetry.events`", experiments_column_type='native', ) # The telemetry.events table is clustered by event_category. # Normandy accounts for about 10% of event volume, so this dramatically # reduces bytes queried compared to counting rows from the generic events DataSource. normandy_events = DataSource(
# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from mozanalysis.metrics import Metric, DataSource, agg_sum, agg_any clients_daily = DataSource( name='clients_daily', from_expr="`moz-fx-data-shared-prod.telemetry.clients_daily`", ) search_clients_daily = DataSource( name='search_clients_daily', from_expr='`moz-fx-data-shared-prod.search.search_clients_daily`', experiments_column_type=None, ) main_summary = DataSource( name='main_summary', from_expr="`moz-fx-data-shared-prod.telemetry.main_summary`") events = DataSource( name='events', from_expr="`moz-fx-data-shared-prod.telemetry.events`", experiments_column_type='native', ) main = DataSource( name='main', from_expr="""( SELECT
def test_get_per_client_data_join(spark): exp = Experiment('a-stub', '20190101') enrollments = spark.createDataFrame( [ ['aaaa', 'control', '20190101'], ['bbbb', 'test', '20190101'], ['cccc', 'control', '20190108'], ['dddd', 'test', '20190109'], ['annie-nodata', 'control', '20190101'], ['bob-badtiming', 'test', '20190102'], ['carol-gooddata', 'test', '20190101'], ['derek-lateisok', 'control', '20190110'], ], [ "client_id", "branch", "enrollment_date", ], ) ex_d = {'a-stub': 'fake-branch-lifes-too-short'} data_source_df = spark.createDataFrame( [ # bob-badtiming only has data before/after analysis window # but missed by `process_data_source` ['bob-badtiming', '20190102', ex_d, 1], ['bob-badtiming', '20190106', ex_d, 2], # carol-gooddata has data on two days (including a dupe day) ['carol-gooddata', '20190102', ex_d, 3], ['carol-gooddata', '20190102', ex_d, 2], ['carol-gooddata', '20190104', ex_d, 6], # derek-lateisok has data before and during the analysis window ['derek-lateisok', '20190110', ex_d, 1000], ['derek-lateisok', '20190111', ex_d, 1], # TODO: exercise the last condition on the join ], [ "client_id", "submission_date_s3", "experiments", "some_value", ], ) ds = DataSource.from_dataframe('ds', data_source_df) metric = Metric.from_col('some_value', agg_sum(data_source_df.some_value), ds) res = exp.get_per_client_data(enrollments, [metric], '20190114', 1, 3, keep_client_id=True) # Check that the dataframe has the correct number of rows assert res.count() == enrollments.count() # Check that dataless enrollments are handled correctly annie_nodata = res.filter(res.client_id == 'annie-nodata') assert annie_nodata.count() == 1 assert annie_nodata.first()['some_value'] == 0 # Check that early and late data were ignored # i.e. check the join, not just _process_data_source_df bob_badtiming = res.filter(res.client_id == 'bob-badtiming') assert bob_badtiming.count() == 1 assert bob_badtiming.first()['some_value'] == 0 # Check that _process_data_source_df didn't do the # heavy lifting above time_limits = TimeLimits.for_single_analysis_window( exp.start_date, '20190114', 1, 3, exp.num_dates_enrollment) pds = exp._process_data_source_df(data_source_df, time_limits) assert pds.filter(pds.client_id == 'bob-badtiming').select( F.sum(pds.some_value).alias('agg_val')).first()['agg_val'] == 3 # Check that relevant data was included appropriately carol_gooddata = res.filter(res.client_id == 'carol-gooddata') assert carol_gooddata.count() == 1 assert carol_gooddata.first()['some_value'] == 11 derek_lateisok = res.filter(res.client_id == 'derek-lateisok') assert derek_lateisok.count() == 1 assert derek_lateisok.first()['some_value'] == 1 # Check that it still works for `data_source`s without an experiments map ds_df_noexp = data_source_df.drop('experiments') ds_noexp = DataSource.from_dataframe('ds_noexp', ds_df_noexp) metric_noexp = Metric.from_col('some_value', agg_sum(ds_df_noexp.some_value), ds_noexp) res2 = exp.get_per_client_data(enrollments, [metric_noexp], '20190114', 1, 3, keep_client_id=True) assert res2.count() == enrollments.count()
def test_metrics_with_exposure(self, monkeypatch, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="branch1", ratio=0.5), Branch(slug="branch2", ratio=0.5) ], reference_branch="branch2", normandy_slug="test-experiment", is_high_population=False, app_name="firefox_desktop", app_id="firefox-desktop", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expression=agg_sum("active_hours_sum"), analysis_bases=[AnalysisBasis.EXPOSURES], ) config.metrics = { AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())] } config.experiment.exposure_signal = ExposureSignal( name="ad_exposure", data_source=test_clients_daily, select_expression="active_hours_sum > 0", friendly_name="Ad exposure", description="Clients have clicked on ad", window_start="enrollment_start", window_end="analysis_window_end", ) self.analysis_mock_run(monkeypatch, config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_exposures_week_1` ORDER BY enrollment_date DESC """) expected_metrics_results = [ { "client_id": "bbbb", "branch": "branch2", "enrollment_date": datetime.date(2020, 4, 3), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, { "client_id": "aaaa", "branch": "branch1", "enrollment_date": datetime.date(2020, 4, 2), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, ] r = query_job.result() for i, row in enumerate(r): for k, v in expected_metrics_results[i].items(): assert row[k] == v assert (client.client.get_table( f"{project_id}.{temporary_dataset}.test_experiment_exposures_weekly" ) is not None) assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1" ) is not None) assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly" ) is not None)
def test_datasource_constructor_succeeds(experiments_column_type): DataSource( name="foo", from_expr="my_table.name", experiments_column_type=None, )
def test_no_enrollments(self, monkeypatch, client, project_id, static_dataset, temporary_dataset): experiment = Experiment( experimenter_slug="test-experiment-2", type="rollout", status="Live", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, branches=[ Branch(slug="a", ratio=0.5), Branch(slug="b", ratio=0.5) ], reference_branch="a", normandy_slug="test-experiment-2", is_high_population=False, app_name="firefox_desktop", app_id="firefox-desktop", ) config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expression=agg_sum("active_hours_sum"), ) config.metrics = { AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())] } self.analysis_mock_run(monkeypatch, config, static_dataset, temporary_dataset, project_id) query_job = client.client.query(f""" SELECT * FROM `{project_id}.{temporary_dataset}.test_experiment_2_enrollments_week_1` ORDER BY enrollment_date DESC """) assert query_job.result().total_rows == 0 stats = client.client.list_rows( f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_week_1" ).to_dataframe() count_by_branch = stats.query("statistic == 'count'").set_index( "branch") assert count_by_branch.loc["a", "point"] == 0.0 assert count_by_branch.loc["b", "point"] == 0.0 assert count_by_branch.loc["b", "analysis_basis"] == "enrollments" assert (client.client.get_table( f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_weekly" ) is not None)
# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from mozanalysis.metrics import DataSource, Metric #: DataSource: The baseline ping table. baseline = DataSource( name="baseline", from_expr="""( SELECT p.*, DATE(p.submission_timestamp) AS submission_date FROM `moz-fx-data-shared-prod.{dataset}.baseline` p )""", client_id_column="client_info.client_id", experiments_column_type="glean", default_dataset="org_mozilla_ios_firefox", ) #: DataSource: Events table. #: For convenience, this is exploded to one-row-per-event #: like the ``telemetry.events`` dataset. events = DataSource( name="events", from_expr="""( SELECT p.* EXCEPT (events), DATE(p.submission_timestamp) AS submission_date, event
def test_metrics(self, client): experiment = Experiment( slug="test-experiment", type="rollout", start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc), end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc), proposed_enrollment=7, variants=[ Variant(is_control=False, slug="branch1", ratio=0.5), Variant(is_control=True, slug="branch2", ratio=0.5), ], normandy_slug="test-experiment", ) orig = mozanalysis.experiment.Experiment.build_query def build_query_test_project(instance, *args, **kwargs): # to use the test project and dataset, we need to change the SQL query # generated by mozanalysis query = orig(instance, args[0], args[1], args[2], args[3]) query = query.replace("moz-fx-data-shared-prod", self.project_id) query = query.replace("telemetry", self.static_dataset) return query config = AnalysisSpec().resolve(experiment) test_clients_daily = DataSource( name="clients_daily", from_expr=f"`{self.project_id}.test_data.clients_daily`", ) test_active_hours = Metric( name="active_hours", data_source=test_clients_daily, select_expr=agg_sum("active_hours_sum"), ) config.metrics = { AnalysisPeriod.WEEK: [ Summary(test_active_hours, BootstrapMean(ref_branch_label="branch1")) ] } analysis = Analysis(self.project_id, self.test_dataset, config) with mock.patch.object( mozanalysis.experiment.Experiment, "build_query", new=build_query_test_project ): analysis.run(current_date=dt.datetime(2020, 4, 12), dry_run=False) query_job = client.query( f""" SELECT * FROM `{self.project_id}.{self.test_dataset}.test_experiment_week_1` ORDER BY enrollment_date DESC """ ) expected_metrics_results = [ { "client_id": "bbbb", "branch": "branch2", "enrollment_date": datetime.date(2020, 4, 3), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, { "client_id": "aaaa", "branch": "branch1", "enrollment_date": datetime.date(2020, 4, 2), "num_enrollment_events": 1, "analysis_window_start": 0, "analysis_window_end": 6, }, ] for i, row in enumerate(query_job.result()): for k, v in expected_metrics_results[i].items(): assert row[k] == v assert ( client.get_table(f"{self.project_id}.{self.test_dataset}.test_experiment_weekly") is not None ) assert ( client.get_table( f"{self.project_id}.{self.test_dataset}.statistics_test_experiment_week_1" ) is not None ) assert ( client.get_table( f"{self.project_id}.{self.test_dataset}.statistics_test_experiment_weekly" ) is not None )