Python AnalysisSpec.AnalysisSpec示例，jetstream.config.AnalysisSpec.AnalysisSpec Python示例

示例#1

0

显示文件

def test_get_timelimits_if_ready(experiments):
    config = AnalysisSpec().resolve(experiments[0])
    config2 = AnalysisSpec().resolve(experiments[2])

    analysis = Analysis("test", "test", config)
    analysis2 = Analysis("test", "test", config2)

    date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(0)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAY, date) is None
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.WEEK, date) is None

    date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(2)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAY, date) is None
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.WEEK, date) is None

    date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(7)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAY, date)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.WEEK, date) is None

    date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(days=13)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAY, date)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.WEEK, date)

    date = dt.datetime(2020, 2, 29, tzinfo=pytz.utc)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.OVERALL,
                                             date) is None

    date = dt.datetime(2020, 3, 1, tzinfo=pytz.utc)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.OVERALL, date)
    assert analysis2._get_timelimits_if_ready(AnalysisPeriod.OVERALL,
                                              date) is None

    date = dt.datetime(2019, 12, 1, tzinfo=pytz.utc) + timedelta(days=34)
    assert analysis._get_timelimits_if_ready(AnalysisPeriod.DAYS_28, date)

示例#2

0

显示文件

文件： test_analysis.py 项目： matf/jetstream

def test_regression_20200316():
    experiment_json = r"""
    {
      "experiment_url": "https://blah/experiments/search-tips-aka-nudges/",
      "type": "addon",
      "name": "Search Tips aka Nudges",
      "slug": "search-tips-aka-nudges",
      "public_name": "Search Tips",
      "public_description": "Search Tips are designed to increase engagement with the QuantumBar.",
      "status": "Live",
      "countries": [],
      "platform": "All Platforms",
      "start_date": 1578960000000,
      "end_date": 1584921600000,
      "population": "2% of Release Firefox 72.0 to 74.0",
      "population_percent": "2.0000",
      "firefox_channel": "Release",
      "firefox_min_version": "72.0",
      "firefox_max_version": "74.0",
      "addon_experiment_id": null,
      "addon_release_url": "https://bugzilla.mozilla.org/attachment.cgi?id=9120542",
      "pref_branch": null,
      "pref_name": null,
      "pref_type": null,
      "proposed_start_date": 1578960000000,
      "proposed_enrollment": 21,
      "proposed_duration": 69,
      "normandy_slug": "addon-search-tips-aka-nudges-release-72-74-bug-1603564",
      "normandy_id": 902,
      "other_normandy_ids": [],
      "variants": [
        {
          "description": "Standard address bar experience",
          "is_control": false,
          "name": "control",
          "ratio": 50,
          "slug": "control",
          "value": null,
          "addon_release_url": null,
          "preferences": []
        },
        {
          "description": "",
          "is_control": true,
          "name": "treatment",
          "ratio": 50,
          "slug": "treatment",
          "value": null,
          "addon_release_url": null,
          "preferences": []
        }
      ]
    }
    """
    experiment = ExperimentV1.from_dict(
        json.loads(experiment_json)).to_experiment()
    config = AnalysisSpec().resolve(experiment)
    analysis = Analysis("test", "test", config)
    analysis.run(current_date=dt.datetime(2020, 3, 16, tzinfo=pytz.utc),
                 dry_run=True)

示例#3

0

显示文件

    def test_no_enrollments(self, client, project_id, static_dataset,
                            temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment-2",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="a", ratio=0.5),
                Branch(slug="b", ratio=0.5)
            ],
            reference_branch="a",
            features=[],
            normandy_slug="test-experiment-2",
        )

        config = AnalysisSpec().resolve(experiment)

        self.analysis_mock_run(config, static_dataset, temporary_dataset,
                               project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_2_week_1`
            ORDER BY enrollment_date DESC
        """)

        assert query_job.result().total_rows == 0

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_week_1"
        ).to_dataframe()

        count_by_branch = stats.query("statistic == 'count'").set_index(
            "branch")
        assert count_by_branch.loc["a", "point"] == 0.0
        assert count_by_branch.loc["b", "point"] == 0.0

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_weekly"
        ) is not None)

示例#4

0

显示文件

def test_regression_20200320():
    experiment_json = r"""
        {
          "experiment_url": "https://experimenter.services.mozilla.com/experiments/impact-of-level-2-etp-on-a-custom-distribution/",
          "type": "pref",
          "name": "Impact of Level 2 ETP on a Custom Distribution",
          "slug": "impact-of-level-2-etp-on-a-custom-distribution",
          "public_name": "Impact of Level 2 ETP",
          "status": "Live",
          "start_date": 1580169600000,
          "end_date": 1595721600000,
          "proposed_start_date": 1580169600000,
          "proposed_enrollment": null,
          "proposed_duration": 180,
          "normandy_slug": "pref-impact-of-level-2-etp-on-a-custom-distribution-release-72-80-bug-1607493",
          "normandy_id": 906,
          "other_normandy_ids": [],
          "variants": [
            {
              "description": "",
              "is_control": true,
              "name": "treatment",
              "ratio": 100,
              "slug": "treatment",
              "value": "true",
              "addon_release_url": null,
              "preferences": []
            }
          ]
        }
    """  # noqa
    experiment = ExperimentV1.from_dict(
        json.loads(experiment_json)).to_experiment()
    config = AnalysisSpec().resolve(experiment)
    analysis = Analysis("test", "test", config)
    with pytest.raises(NoEnrollmentPeriodException):
        analysis.run(current_date=dt.datetime(2020, 3, 19, tzinfo=pytz.utc),
                     dry_run=True)

示例#5

0

显示文件

    def test_metrics(self, client, project_id, static_dataset,
                     temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            features=[],
            normandy_slug="test-experiment",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expr=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(config, static_dataset, temporary_dataset,
                               project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
        ]

        for i, row in enumerate(query_job.result()):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_weekly")
                is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ).to_dataframe()

        count_by_branch = stats.query("statistic == 'count'").set_index(
            "branch")
        assert count_by_branch.loc["branch1", "point"] == 1.0
        assert count_by_branch.loc["branch2", "point"] == 1.0

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly"
        ) is not None)

示例#6

0

显示文件

文件： test_analysis_integration.py 项目： mozilla/jetstream

    def test_logging(self, monkeypatch, client, project_id, static_dataset,
                     temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [
                Summary(test_active_hours,
                        BootstrapMean(confidence_interval=10))
            ]
        }

        log_config = LogConfiguration(
            log_project_id=project_id,
            log_dataset_id=temporary_dataset,
            log_table_id="logs",
            log_to_bigquery=True,
            task_profiling_log_table_id="task_profiling_logs",
            task_monitoring_log_table_id="task_monitoring_logs",
            capacity=1,
        )
        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id, log_config)

        assert client.client.get_table(
            f"{project_id}.{temporary_dataset}.logs") is not None

        logs = list(
            client.client.list_rows(f"{project_id}.{temporary_dataset}.logs"))

        assert len(logs) >= 1
        error_logs = [log for log in logs if log.get("log_level") == "ERROR"]
        assert (
            "Error while computing statistic bootstrap_mean for metric active_hours"
            in error_logs[0].get("message"))
        assert error_logs[0].get("log_level") == "ERROR"

示例#7

0

显示文件

文件： test_analysis_integration.py 项目： mozilla/jetstream

    def test_with_segments(self, monkeypatch, client, project_id,
                           static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        test_clients_last_seen = SegmentDataSource(
            "clients_last_seen", f"`{project_id}.test_data.clients_last_seen`")
        regular_user_v3 = Segment(
            "regular_user_v3",
            test_clients_last_seen,
            "COALESCE(LOGICAL_OR(is_regular_user_v3), FALSE)",
        )
        config.experiment.segments = [regular_user_v3]

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_enrollments_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
                "regular_user_v3": True,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
                "regular_user_v3": False,
            },
        ]

        for i, row in enumerate(query_job.result()):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_enrollments_weekly"
        ) is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ).to_dataframe()

        # Only one count per segment and branch, please
        assert (stats.query(
            "metric == 'identity' and statistic == 'count'").groupby(
                ["segment", "analysis_basis", "window_index",
                 "branch"]).size() == 1).all()

        count_by_branch = stats.query(
            "segment == 'all' and statistic == 'count'").set_index("branch")
        assert count_by_branch.loc["branch1", "point"] == 1.0
        assert count_by_branch.loc["branch2", "point"] == 1.0
        assert count_by_branch.loc["branch2",
                                   "analysis_basis"] == "enrollments"

示例#8

0

显示文件

文件： test_analysis_integration.py 项目： mozilla/jetstream

    def test_no_enrollments(self, monkeypatch, client, project_id,
                            static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment-2",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="a", ratio=0.5),
                Branch(slug="b", ratio=0.5)
            ],
            reference_branch="a",
            normandy_slug="test-experiment-2",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_2_enrollments_week_1`
            ORDER BY enrollment_date DESC
        """)

        assert query_job.result().total_rows == 0

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_week_1"
        ).to_dataframe()

        count_by_branch = stats.query("statistic == 'count'").set_index(
            "branch")
        assert count_by_branch.loc["a", "point"] == 0.0
        assert count_by_branch.loc["b", "point"] == 0.0
        assert count_by_branch.loc["b", "analysis_basis"] == "enrollments"

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_weekly"
        ) is not None)

示例#9

0

显示文件

文件： test_analysis_integration.py 项目： mozilla/jetstream

    def test_metrics_with_exposure(self, monkeypatch, client, project_id,
                                   static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
            analysis_bases=[AnalysisBasis.EXPOSURES],
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }
        config.experiment.exposure_signal = ExposureSignal(
            name="ad_exposure",
            data_source=test_clients_daily,
            select_expression="active_hours_sum > 0",
            friendly_name="Ad exposure",
            description="Clients have clicked on ad",
            window_start="enrollment_start",
            window_end="analysis_window_end",
        )

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_exposures_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
        ]

        r = query_job.result()

        for i, row in enumerate(r):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_exposures_weekly"
        ) is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly"
        ) is not None)