def test_experiment_analytics_search_by_nested_filter(sagemaker_session):
    with experiment(sagemaker_session) as experiment_name:
        search_exp = {
            "Filters": [
                {
                    "Name": "Parents.ExperimentName",
                    "Operator": "Equals",
                    "Value": experiment_name
                },
                {
                    "Name": "Parameters.hp1",
                    "Operator": "GreaterThanOrEqualTo",
                    "Value": "10"
                },
            ]
        }

        analytics = ExperimentAnalytics(sagemaker_session=sagemaker_session,
                                        search_expression=search_exp)

        assert list(analytics.dataframe().columns) == [
            "TrialComponentName",
            "DisplayName",
            "hp1",
            "Trials",
            "Experiments",
        ]
        assert (
            len(analytics.dataframe()) > 5
        )  # TODO [owen-t] Replace with == 10 and put test in retry block
Exemplo n.º 2
0
def test_experiment_analytics(sagemaker_session):
    with experiment(sagemaker_session) as experiment_name:
        analytics = ExperimentAnalytics(
            experiment_name=experiment_name, sagemaker_session=sagemaker_session
        )

        assert list(analytics.dataframe().columns) == ["TrialComponentName", "DisplayName", "hp1"]
def test_trial_analytics_dataframe_filter_trials_search_exp_with_sort(mock_session):
    mock_session.sagemaker_client.search.return_value = {"Results": []}

    search_exp = {"Filters": [{"Name": "Tags.someTag", "Operator": "Equals", "Value": "someValue"}]}
    analytics = ExperimentAnalytics(
        experiment_name="someExperiment",
        search_expression=search_exp,
        sort_by="Tags.someTag",
        sort_order="Ascending",
        sagemaker_session=mock_session,
    )

    analytics.dataframe()

    expected_search_exp = {
        "Filters": [
            {"Name": "Tags.someTag", "Operator": "Equals", "Value": "someValue"},
            {"Name": "Parents.ExperimentName", "Operator": "Equals", "Value": "someExperiment"},
        ]
    }

    mock_session.sagemaker_client.search.assert_called_with(
        Resource="ExperimentTrialComponent",
        SearchExpression=expected_search_exp,
        SortBy="Tags.someTag",
        SortOrder="Ascending",
    )
def test_experiment_analytics_pagination(sagemaker_session):
    sm = sagemaker_session.sagemaker_client

    experiment_name = "experiment" + str(uuid.uuid4())
    sm.create_experiment(ExperimentName=experiment_name)

    # Search returns 10 results by default. Add 20 trials to verify pagination,
    for i in range(20):
        trial_name = "trial-" + str(uuid.uuid4())
        sm.create_trial(TrialName=trial_name, ExperimentName=experiment_name)
        trial_component_name = "tc-" + str(uuid.uuid4())
        sm.create_trial_component(TrialComponentName=trial_component_name,
                                  DisplayName="Training")
        sm.update_trial_component(TrialComponentName=trial_component_name,
                                  Parameters={"hp1": {
                                      "NumberValue": i
                                  }})
        sm.associate_trial_component(TrialComponentName=trial_component_name,
                                     TrialName=trial_name)

    time.sleep(
        15
    )  # wait for search to get updated  TODO [owen-t]: Replace with retry

    analytics = ExperimentAnalytics(experiment_name=experiment_name,
                                    sagemaker_session=sagemaker_session)

    assert list(analytics.dataframe().columns) == [
        "TrialComponentName", "DisplayName", "hp1"
    ]
    assert (len(analytics.dataframe()) > 10
            )  # TODO [owen-t] Replace with == 20 and put test in retry block
def test_experiment_analytics(sagemaker_session):
    sm = sagemaker_session.sagemaker_client

    experiment_name = "experiment-" + str(uuid.uuid4())
    sm.create_experiment(ExperimentName=experiment_name)

    for i in range(5):
        trial_name = "trial-" + str(uuid.uuid4())
        sm.create_trial(TrialName=trial_name, ExperimentName=experiment_name)
        trial_component_name = "tc-" + str(uuid.uuid4())
        sm.create_trial_component(TrialComponentName=trial_component_name,
                                  DisplayName="Training")
        sm.update_trial_component(TrialComponentName=trial_component_name,
                                  Parameters={"hp1": {
                                      "NumberValue": i
                                  }})
        sm.associate_trial_component(TrialComponentName=trial_component_name,
                                     TrialName=trial_name)

    time.sleep(15)  # wait for search to get updated

    analytics = ExperimentAnalytics(experiment_name=experiment_name,
                                    sagemaker_session=sagemaker_session)

    assert list(analytics.dataframe().columns) == [
        "TrialComponentName", "DisplayName", "hp1"
    ]
def test_trial_analytics_dataframe_search_pagination(mock_session):
    result_page_1 = {
        "Results": [{"TrialComponent": trial_component("trial-1")}],
        "NextToken": "nextToken",
    }

    result_page_2 = {"Results": [{"TrialComponent": trial_component("trial-2")}]}

    mock_session.sagemaker_client.search.side_effect = [result_page_1, result_page_2]
    analytics = ExperimentAnalytics(experiment_name="experiment1", sagemaker_session=mock_session)

    expected_dataframe = pd.DataFrame.from_dict(
        OrderedDict(
            [
                ("TrialComponentName", ["trial-1", "trial-2"]),
                ("DisplayName", ["Training", "Training"]),
                ("SourceArn", ["some-source-arn", "some-source-arn"]),
                ("hp1", [1.0, 1.0]),
                ("hp2", ["abc", "abc"]),
                ("metric1 - Min", [3.0, 3.0]),
                ("metric1 - Max", [5.0, 5.0]),
                ("metric1 - Avg", [4.0, 4.0]),
                ("metric1 - StdDev", [1.0, 1.0]),
                ("metric1 - Last", [2.0, 2.0]),
                ("metric1 - Count", [2.0, 2.0]),
                ("metric2 - Min", [8.0, 8.0]),
                ("metric2 - Max", [10.0, 10.0]),
                ("metric2 - Avg", [9.0, 9.0]),
                ("metric2 - StdDev", [0.05, 0.05]),
                ("metric2 - Last", [7.0, 7.0]),
                ("metric2 - Count", [2.0, 2.0]),
                ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),
                ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),
                ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),
                ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),
                ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),
                ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),
                ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),
                ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),
            ]
        )
    )

    pd.testing.assert_frame_equal(expected_dataframe, analytics.dataframe())
    expected_search_exp = {
        "Filters": [
            {"Name": "Parents.ExperimentName", "Operator": "Equals", "Value": "experiment1"}
        ]
    }
    mock_session.sagemaker_client.search.assert_has_calls(
        [
            mock.call(Resource="ExperimentTrialComponent", SearchExpression=expected_search_exp),
            mock.call(
                Resource="ExperimentTrialComponent",
                SearchExpression=expected_search_exp,
                NextToken="nextToken",
            ),
        ]
    )
Exemplo n.º 7
0
def experiment_with_artifacts(sagemaker_session):
    sm = sagemaker_session.sagemaker_client
    trials = {}  # for resource cleanup

    experiment_name = "experiment-" + str(uuid.uuid4())
    try:
        sm.create_experiment(ExperimentName=experiment_name)

        # Search returns 10 results by default. Add 20 trials to verify pagination.
        for i in range(20):
            trial_name = "trial-" + str(uuid.uuid4())
            sm.create_trial(TrialName=trial_name,
                            ExperimentName=experiment_name)

            trial_component_name = "tc-" + str(uuid.uuid4())
            trials[trial_name] = trial_component_name

            sm.create_trial_component(TrialComponentName=trial_component_name,
                                      DisplayName="Training")
            sm.update_trial_component(
                TrialComponentName=trial_component_name,
                Parameters={"hp1": {
                    "NumberValue": i
                }},
                InputArtifacts={
                    "inputArtifacts1": {
                        "MediaType": "text/csv",
                        "Value": "s3:/foo/bar1"
                    }
                },
                OutputArtifacts={
                    "outputArtifacts1": {
                        "MediaType": "text/plain",
                        "Value": "s3:/foo/bar2"
                    }
                },
            )
            sm.associate_trial_component(
                TrialComponentName=trial_component_name, TrialName=trial_name)
            time.sleep(1)

        time.sleep(15)  # wait for search to get updated

        # allow search time thrice
        for _ in range(3):
            analytics = ExperimentAnalytics(
                experiment_name=experiment_name,
                sagemaker_session=sagemaker_session)

            if len(analytics.dataframe().columns) > 0:
                break

            time.sleep(15)

        yield experiment_name
    finally:
        _delete_resources(sm, experiment_name, trials)
def test_trial_analytics_dataframe_selected_hyperparams(mock_session):
    mock_session.sagemaker_client.search.return_value = {
        "Results": [
            {
                "TrialComponent": trial_component("trial-1")
            },
            {
                "TrialComponent": trial_component("trial-2")
            },
        ]
    }
    analytics = ExperimentAnalytics(experiment_name="experiment1",
                                    parameter_names=["hp2"],
                                    sagemaker_session=mock_session)

    expected_dataframe = pd.DataFrame.from_dict(
        OrderedDict([
            ("TrialComponentName", ["trial-1", "trial-2"]),
            ("DisplayName", ["Training", "Training"]),
            ("SourceArn", ["some-source-arn", "some-source-arn"]),
            ("hp2", ["abc", "abc"]),
            ("metric1 - Min", [3.0, 3.0]),
            ("metric1 - Max", [5.0, 5.0]),
            ("metric1 - Avg", [4.0, 4.0]),
            ("metric1 - StdDev", [1.0, 1.0]),
            ("metric1 - Last", [2.0, 2.0]),
            ("metric1 - Count", [2.0, 2.0]),
            ("metric2 - Min", [8.0, 8.0]),
            ("metric2 - Max", [10.0, 10.0]),
            ("metric2 - Avg", [9.0, 9.0]),
            ("metric2 - StdDev", [0.05, 0.05]),
            ("metric2 - Last", [7.0, 7.0]),
            ("metric2 - Count", [2.0, 2.0]),
            ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),
            ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),
            ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),
            ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),
            ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),
            ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),
            ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),
            ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),
            ("Trials", [["trial1"], ["trial1"]]),
            ("Experiments", [["experiment1"], ["experiment1"]]),
        ]))

    pd.testing.assert_frame_equal(expected_dataframe, analytics.dataframe())
    expected_search_exp = {
        "Filters": [{
            "Name": "Parents.ExperimentName",
            "Operator": "Equals",
            "Value": "experiment1"
        }]
    }
    mock_session.sagemaker_client.search.assert_called_with(
        Resource="ExperimentTrialComponent",
        SearchExpression=expected_search_exp)
Exemplo n.º 9
0
def test_experiment_analytics_pagination(sagemaker_session):
    with experiment(sagemaker_session) as experiment_name:
        analytics = ExperimentAnalytics(
            experiment_name=experiment_name, sagemaker_session=sagemaker_session
        )

        assert list(analytics.dataframe().columns) == ["TrialComponentName", "DisplayName", "hp1"]
        assert (
            len(analytics.dataframe()) > 10
        )  # TODO [owen-t] Replace with == 20 and put test in retry block
def test_trial_analytics_dataframe_filter_trials_search_exp_only(mock_session):
    mock_session.sagemaker_client.search.return_value = {"Results": []}

    search_exp = {"Filters": [{"Name": "Tags.someTag", "Operator": "Equals", "Value": "someValue"}]}
    analytics = ExperimentAnalytics(search_expression=search_exp, sagemaker_session=mock_session)

    analytics.dataframe()

    mock_session.sagemaker_client.search.assert_called_with(
        Resource="ExperimentTrialComponent", SearchExpression=search_exp
    )
Exemplo n.º 11
0
def test_experiment_analytics_artifacts(sagemaker_session):
    with experiment_with_artifacts(sagemaker_session) as experiment_name:
        analytics = ExperimentAnalytics(experiment_name=experiment_name,
                                        sagemaker_session=sagemaker_session)

        assert list(analytics.dataframe().columns) == [
            "TrialComponentName",
            "DisplayName",
            "hp1",
            "inputArtifacts1 - MediaType",
            "inputArtifacts1 - Value",
            "outputArtifacts1 - MediaType",
            "outputArtifacts1 - Value",
        ]
def test_experiment_analytics_search_by_nested_filter(sagemaker_session):
    sm = sagemaker_session.sagemaker_client

    experiment_name = "experiment" + str(uuid.uuid4())
    sm.create_experiment(ExperimentName=experiment_name)

    for i in range(20):
        trial_name = "trial-" + str(uuid.uuid4())
        sm.create_trial(TrialName=trial_name, ExperimentName=experiment_name)
        trial_component_name = "tc-" + str(uuid.uuid4())
        sm.create_trial_component(TrialComponentName=trial_component_name,
                                  DisplayName="Training")
        sm.update_trial_component(TrialComponentName=trial_component_name,
                                  Parameters={"hp1": {
                                      "NumberValue": i
                                  }})
        sm.associate_trial_component(TrialComponentName=trial_component_name,
                                     TrialName=trial_name)

    time.sleep(
        15
    )  # wait for search to get updated  TODO [owen-t]: Replace with retry

    search_exp = {
        "Filters": [
            {
                "Name": "Parents.ExperimentName",
                "Operator": "Equals",
                "Value": experiment_name
            },
            {
                "Name": "Parameters.hp1",
                "Operator": "GreaterThanOrEqualTo",
                "Value": "10"
            },
        ]
    }

    analytics = ExperimentAnalytics(sagemaker_session=sagemaker_session,
                                    search_expression=search_exp)

    assert list(analytics.dataframe().columns) == [
        "TrialComponentName", "DisplayName", "hp1"
    ]
    assert (len(analytics.dataframe()) > 5
            )  # TODO [owen-t] Replace with == 10 and put test in retry block
Exemplo n.º 13
0
def test_trial_analytics_dataframe_selected_metrics(mock_session):
    mock_session.sagemaker_client.search.return_value = {
        "Results": [
            {
                "TrialComponent": trial_component("trial-1")
            },
            {
                "TrialComponent": trial_component("trial-2")
            },
        ]
    }
    analytics = ExperimentAnalytics(experiment_name="experiment1",
                                    metric_names=["metric1"],
                                    sagemaker_session=mock_session)

    expected_dataframe = pd.DataFrame.from_dict(
        OrderedDict([
            ("TrialComponentName", ["trial-1", "trial-2"]),
            ("DisplayName", ["Training", "Training"]),
            ("SourceArn", ["some-source-arn", "some-source-arn"]),
            ("hp1", [1.0, 1.0]),
            ("hp2", ["abc", "abc"]),
            ("metric1 - Min", [3.0, 3.0]),
            ("metric1 - Max", [5.0, 5.0]),
            ("metric1 - Avg", [4.0, 4.0]),
            ("metric1 - StdDev", [1.0, 1.0]),
            ("metric1 - Last", [2.0, 2.0]),
            ("metric1 - Count", [2.0, 2.0]),
        ]))

    pd.testing.assert_frame_equal(expected_dataframe, analytics.dataframe())
    expected_search_exp = {
        "Filters": [{
            "Name": "Parents.ExperimentName",
            "Operator": "Equals",
            "Value": "experiment1"
        }]
    }
    mock_session.sagemaker_client.search.assert_called_with(
        Resource="ExperimentTrialComponent",
        SearchExpression=expected_search_exp)
Exemplo n.º 14
0
def test_trial_analytics_dataframe_throws_error_if_no_filter_specified(
        mock_session):
    with pytest.raises(ValueError):
        ExperimentAnalytics(sagemaker_session=mock_session)