Python agg_sum примеры, mozanalysis.metrics.agg_sum Python примеры использования

Пример #1

0

Показать файл

Файл: desktop.py Проект: felixlawrence/mozanalysis

def active_hours(cd):
    """Active hours, from ``active_ticks``

    At any given moment, a client is "active" if there was a keyboard or
    mouse interaction (click, scroll, move) in the previous 5 seconds.
    """
    return agg_sum(cd.active_hours_sum)

Пример #2

0

Показать файл

def _get_metrics(spark):
    ds_df = _get_data_source_df(spark)
    ds = DataSource.from_dataframe('bla_ds', ds_df)

    return {
        'how_many_ones':
        Metric.from_col('how_many_ones', agg_sum(ds_df.constant_one), ds),
    }

Пример #3

0

Показать файл

def test_agg_sum(spark):
    df = register_fixture(spark)

    res = df.groupBy('client_id').agg(
        mm.agg_sum(df.numeric_col).alias('metric_value')).toPandas().set_index(
            'client_id').metric_value

    assert res['aaaa'] == 2
    assert res['bb'] == 0
    assert res['ccc'] == 5
    assert res['dd'] == 0

Пример #4

0

Показать файл

def test_metric_from_col(spark):
    orig_df = register_fixture(spark)

    ds = mm.DataSource.from_dataframe('an_ordinary_data_source', orig_df)

    metric = mm.Metric.from_col('a_special_metric',
                                mm.agg_sum(orig_df.numeric_col), ds)

    assert metric.name == 'a_special_metric'
    assert metric.data_source.get_dataframe(spark, None) == orig_df

    res = orig_df.groupBy('client_id').agg(metric.get_col(
        spark, None)).toPandas().set_index('client_id').a_special_metric

    assert res['aaaa'] == 2
    assert res['bb'] == 0
    assert res['ccc'] == 5
    assert res['dd'] == 0

Пример #5

0

Показать файл

Файл: desktop.py Проект: felixlawrence/mozanalysis

def search_count(scd):
    return agg_sum(scd.sap)

Пример #6

0

Показать файл

Файл: test_analysis_integration.py Проект: mozilla/jetstream

    def test_logging(self, monkeypatch, client, project_id, static_dataset,
                     temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [
                Summary(test_active_hours,
                        BootstrapMean(confidence_interval=10))
            ]
        }

        log_config = LogConfiguration(
            log_project_id=project_id,
            log_dataset_id=temporary_dataset,
            log_table_id="logs",
            log_to_bigquery=True,
            task_profiling_log_table_id="task_profiling_logs",
            task_monitoring_log_table_id="task_monitoring_logs",
            capacity=1,
        )
        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id, log_config)

        assert client.client.get_table(
            f"{project_id}.{temporary_dataset}.logs") is not None

        logs = list(
            client.client.list_rows(f"{project_id}.{temporary_dataset}.logs"))

        assert len(logs) >= 1
        error_logs = [log for log in logs if log.get("log_level") == "ERROR"]
        assert (
            "Error while computing statistic bootstrap_mean for metric active_hours"
            in error_logs[0].get("message"))
        assert error_logs[0].get("log_level") == "ERROR"

Пример #7

0

Показать файл

Файл: test_analysis_integration.py Проект: mozilla/jetstream

    def test_with_segments(self, monkeypatch, client, project_id,
                           static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        test_clients_last_seen = SegmentDataSource(
            "clients_last_seen", f"`{project_id}.test_data.clients_last_seen`")
        regular_user_v3 = Segment(
            "regular_user_v3",
            test_clients_last_seen,
            "COALESCE(LOGICAL_OR(is_regular_user_v3), FALSE)",
        )
        config.experiment.segments = [regular_user_v3]

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_enrollments_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
                "regular_user_v3": True,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
                "regular_user_v3": False,
            },
        ]

        for i, row in enumerate(query_job.result()):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_enrollments_weekly"
        ) is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ).to_dataframe()

        # Only one count per segment and branch, please
        assert (stats.query(
            "metric == 'identity' and statistic == 'count'").groupby(
                ["segment", "analysis_basis", "window_index",
                 "branch"]).size() == 1).all()

        count_by_branch = stats.query(
            "segment == 'all' and statistic == 'count'").set_index("branch")
        assert count_by_branch.loc["branch1", "point"] == 1.0
        assert count_by_branch.loc["branch2", "point"] == 1.0
        assert count_by_branch.loc["branch2",
                                   "analysis_basis"] == "enrollments"

Пример #8

0

Показать файл

Файл: test_analysis_integration.py Проект: mozilla/jetstream

    def test_no_enrollments(self, monkeypatch, client, project_id,
                            static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment-2",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="a", ratio=0.5),
                Branch(slug="b", ratio=0.5)
            ],
            reference_branch="a",
            normandy_slug="test-experiment-2",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_2_enrollments_week_1`
            ORDER BY enrollment_date DESC
        """)

        assert query_job.result().total_rows == 0

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_week_1"
        ).to_dataframe()

        count_by_branch = stats.query("statistic == 'count'").set_index(
            "branch")
        assert count_by_branch.loc["a", "point"] == 0.0
        assert count_by_branch.loc["b", "point"] == 0.0
        assert count_by_branch.loc["b", "analysis_basis"] == "enrollments"

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_weekly"
        ) is not None)

Пример #9

0

Показать файл

Файл: desktop.py Проект: felixlawrence/mozanalysis

def organic_search_count(scd):
    return agg_sum(scd.organic)

Пример #10

0

Показать файл

 def a_lovely_metric(df):
     """Hi there!"""
     return mm.agg_sum(df.numeric_col)

Пример #11

0

Показать файл

Файл: fenix.py Проект: mozilla/mozanalysis

    from_expr="""(
                SELECT
                    p.*,
                    DATE(p.submission_timestamp) AS submission_date
                FROM `moz-fx-data-shared-prod.{dataset}.metrics` p
            )""",
    client_id_column="client_info.client_id",
    experiments_column_type="glean",
    default_dataset="org_mozilla_firefox",
)

#: Metric: ...
uri_count = Metric(
    name="uri_count",
    data_source=baseline,
    select_expr=agg_sum("metrics.counter.events_total_uri_count"),
    friendly_name="URIs visited",
    description="Counts the number of URIs each client visited",
)

#: Metric: ...
user_reports_site_issue_count = Metric(
    name="user_reports_site_issue_count",
    data_source=events,
    select_expr="COUNTIF(event.name = 'browser_menu_action' AND " +
    "mozfun.map.get_key('event.extra', 'item') = 'report_site_issue')",
    friendly_name="Site issues reported",
    description=
    "Counts the number of times clients reported an issue with a site.",
)

Пример #12

0

Показать файл

)

cfr = DataSource(
    name='cfr',
    from_expr="""(
                SELECT
                    *,
                    DATE(submission_timestamp) AS submission_date
                FROM `moz-fx-data-derived-datasets`.messaging_system.cfr
            )""",
    experiments_column_type="native",
)

active_hours = Metric(name='active_hours',
                      data_source=clients_daily,
                      select_expr=agg_sum('active_hours_sum'))

uri_count = Metric(name='uri_count',
                   data_source=clients_daily,
                   select_expr=agg_sum(
                       'scalar_parent_browser_engagement_total_uri_count_sum'))

search_count = Metric(name='search_count',
                      data_source=search_clients_daily,
                      select_expr=agg_sum('sap'))

tagged_search_count = Metric(name='tagged_search_count',
                             data_source=search_clients_daily,
                             select_expr=agg_sum('tagged_sap'))

tagged_follow_on_search_count = Metric(name='tagged_follow_on_search_count',

Пример #13

0

Показать файл

crash = DataSource(
    name='crash',
    from_expr="""(
                SELECT
                    *,
                    DATE(submission_timestamp) AS submission_date,
                    environment.experiments
                FROM `moz-fx-data-shared-prod`.telemetry.crash
            )""",
    experiments_column_type="native",
)

active_hours = Metric(name='active_hours',
                      data_source=clients_daily,
                      select_expr=agg_sum('active_hours_sum'))

uri_count = Metric(name='uri_count',
                   data_source=clients_daily,
                   select_expr=agg_sum(
                       'scalar_parent_browser_engagement_total_uri_count_sum'))

search_count = Metric(name='search_count',
                      data_source=search_clients_daily,
                      select_expr=agg_sum('sap'))

ad_clicks = Metric(name='ad_clicks',
                   data_source=search_clients_daily,
                   select_expr=agg_sum('ad_click'))

organic_search_count = Metric(name='organic_search_count',

Пример #14

0

Показать файл

def test_get_per_client_data_join(spark):
    exp = Experiment('a-stub', '20190101')

    enrollments = spark.createDataFrame(
        [
            ['aaaa', 'control', '20190101'],
            ['bbbb', 'test', '20190101'],
            ['cccc', 'control', '20190108'],
            ['dddd', 'test', '20190109'],
            ['annie-nodata', 'control', '20190101'],
            ['bob-badtiming', 'test', '20190102'],
            ['carol-gooddata', 'test', '20190101'],
            ['derek-lateisok', 'control', '20190110'],
        ],
        [
            "client_id",
            "branch",
            "enrollment_date",
        ],
    )

    ex_d = {'a-stub': 'fake-branch-lifes-too-short'}
    data_source_df = spark.createDataFrame(
        [
            # bob-badtiming only has data before/after analysis window
            # but missed by `process_data_source`
            ['bob-badtiming', '20190102', ex_d, 1],
            ['bob-badtiming', '20190106', ex_d, 2],
            # carol-gooddata has data on two days (including a dupe day)
            ['carol-gooddata', '20190102', ex_d, 3],
            ['carol-gooddata', '20190102', ex_d, 2],
            ['carol-gooddata', '20190104', ex_d, 6],
            # derek-lateisok has data before and during the analysis window
            ['derek-lateisok', '20190110', ex_d, 1000],
            ['derek-lateisok', '20190111', ex_d, 1],
            # TODO: exercise the last condition on the join
        ],
        [
            "client_id",
            "submission_date_s3",
            "experiments",
            "some_value",
        ],
    )

    ds = DataSource.from_dataframe('ds', data_source_df)
    metric = Metric.from_col('some_value', agg_sum(data_source_df.some_value),
                             ds)

    res = exp.get_per_client_data(enrollments, [metric],
                                  '20190114',
                                  1,
                                  3,
                                  keep_client_id=True)

    # Check that the dataframe has the correct number of rows
    assert res.count() == enrollments.count()

    # Check that dataless enrollments are handled correctly
    annie_nodata = res.filter(res.client_id == 'annie-nodata')
    assert annie_nodata.count() == 1
    assert annie_nodata.first()['some_value'] == 0

    # Check that early and late data were ignored
    # i.e. check the join, not just _process_data_source_df
    bob_badtiming = res.filter(res.client_id == 'bob-badtiming')
    assert bob_badtiming.count() == 1
    assert bob_badtiming.first()['some_value'] == 0
    # Check that _process_data_source_df didn't do the
    # heavy lifting above
    time_limits = TimeLimits.for_single_analysis_window(
        exp.start_date, '20190114', 1, 3, exp.num_dates_enrollment)
    pds = exp._process_data_source_df(data_source_df, time_limits)
    assert pds.filter(pds.client_id == 'bob-badtiming').select(
        F.sum(pds.some_value).alias('agg_val')).first()['agg_val'] == 3

    # Check that relevant data was included appropriately
    carol_gooddata = res.filter(res.client_id == 'carol-gooddata')
    assert carol_gooddata.count() == 1
    assert carol_gooddata.first()['some_value'] == 11

    derek_lateisok = res.filter(res.client_id == 'derek-lateisok')
    assert derek_lateisok.count() == 1
    assert derek_lateisok.first()['some_value'] == 1

    # Check that it still works for `data_source`s without an experiments map
    ds_df_noexp = data_source_df.drop('experiments')
    ds_noexp = DataSource.from_dataframe('ds_noexp', ds_df_noexp)
    metric_noexp = Metric.from_col('some_value',
                                   agg_sum(ds_df_noexp.some_value), ds_noexp)

    res2 = exp.get_per_client_data(enrollments, [metric_noexp],
                                   '20190114',
                                   1,
                                   3,
                                   keep_client_id=True)

    assert res2.count() == enrollments.count()

Пример #15

0

Показать файл

Файл: desktop.py Проект: felixlawrence/mozanalysis

def ad_clicks(scd):
    return agg_sum(scd.ad_click)

Пример #16

0

Показать файл

Файл: desktop.py Проект: mozilla/mozanalysis

activity_stream_events = DataSource(
    name="activity_stream_events",
    from_expr="""(
                SELECT
                    *,
                    DATE(submission_timestamp) AS submission_date
                FROM mozdata.activity_stream.events
            )""",
    experiments_column_type="native",
)

#: Metric: ...
active_hours = Metric(
    name="active_hours",
    data_source=clients_daily,
    select_expr=agg_sum("active_hours_sum"),
    friendly_name="Active hours",
    description=dedent("""\
        Measures the amount of time (in 5-second increments) during which
        Firefox received user input from a keyboard or mouse. The Firefox
        window does not need to be focused.
    """),
)

#: Metric: ...
uri_count = Metric(
    name="uri_count",
    data_source=clients_daily,
    select_expr=agg_sum(
        "scalar_parent_browser_engagement_total_uri_count_sum"),
    friendly_name="URIs visited",

Пример #17

0

Показать файл

Файл: desktop.py Проект: felixlawrence/mozanalysis

def uri_count(cd):
    return agg_sum(cd.scalar_parent_browser_engagement_total_uri_count_sum)

Пример #18

0

Показать файл

Файл: test_analysis_integration.py Проект: mozilla/jetstream

    def test_metrics_with_exposure(self, monkeypatch, client, project_id,
                                   static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
            analysis_bases=[AnalysisBasis.EXPOSURES],
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }
        config.experiment.exposure_signal = ExposureSignal(
            name="ad_exposure",
            data_source=test_clients_daily,
            select_expression="active_hours_sum > 0",
            friendly_name="Ad exposure",
            description="Clients have clicked on ad",
            window_start="enrollment_start",
            window_end="analysis_window_end",
        )

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_exposures_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
        ]

        r = query_job.result()

        for i, row in enumerate(r):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_exposures_weekly"
        ) is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly"
        ) is not None)

Пример #19

0

Показать файл

    def test_metrics(self, client, project_id, static_dataset,
                     temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            features=[],
            normandy_slug="test-experiment",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expr=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(config, static_dataset, temporary_dataset,
                               project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
        ]

        for i, row in enumerate(query_job.result()):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_weekly")
                is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ).to_dataframe()

        count_by_branch = stats.query("statistic == 'count'").set_index(
            "branch")
        assert count_by_branch.loc["branch1", "point"] == 1.0
        assert count_by_branch.loc["branch2", "point"] == 1.0

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly"
        ) is not None)

Пример #20

0

Показать файл

Файл: test_analysis_integration.py Проект: ksiegler1/pensieve

    def test_metrics(self, client):
        experiment = Experiment(
            slug="test-experiment",
            type="rollout",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            variants=[
                Variant(is_control=False, slug="branch1", ratio=0.5),
                Variant(is_control=True, slug="branch2", ratio=0.5),
            ],
            normandy_slug="test-experiment",
        )

        orig = mozanalysis.experiment.Experiment.build_query

        def build_query_test_project(instance, *args, **kwargs):
            # to use the test project and dataset, we need to change the SQL query
            # generated by mozanalysis
            query = orig(instance, args[0], args[1], args[2], args[3])
            query = query.replace("moz-fx-data-shared-prod", self.project_id)
            query = query.replace("telemetry", self.static_dataset)
            return query

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily", from_expr=f"`{self.project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expr=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [
                Summary(test_active_hours, BootstrapMean(ref_branch_label="branch1"))
            ]
        }

        analysis = Analysis(self.project_id, self.test_dataset, config)

        with mock.patch.object(
            mozanalysis.experiment.Experiment, "build_query", new=build_query_test_project
        ):
            analysis.run(current_date=dt.datetime(2020, 4, 12), dry_run=False)

        query_job = client.query(
            f"""
            SELECT
              *
            FROM `{self.project_id}.{self.test_dataset}.test_experiment_week_1`
            ORDER BY enrollment_date DESC
        """
        )

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
        ]

        for i, row in enumerate(query_job.result()):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (
            client.get_table(f"{self.project_id}.{self.test_dataset}.test_experiment_weekly")
            is not None
        )
        assert (
            client.get_table(
                f"{self.project_id}.{self.test_dataset}.statistics_test_experiment_week_1"
            )
            is not None
        )
        assert (
            client.get_table(
                f"{self.project_id}.{self.test_dataset}.statistics_test_experiment_weekly"
            )
            is not None
        )

Python agg_sum примеры использования