示例#1
0
def test_bad_comparators(entity_type, bad_comparators, key, entity_value):
    run = Run(run_info=RunInfo(
        run_uuid="hi", run_id="hi", experiment_id=0,
        user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
        start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
        run_data=RunData(metrics=[], params=[], tags=[])
    )
    for bad_comparator in bad_comparators:
        bad_filter = "{entity_type}.{key} {comparator} {value}".format(
            entity_type=entity_type, key=key, comparator=bad_comparator, value=entity_value)
        with pytest.raises(MlflowException) as e:
            SearchUtils.filter([run], bad_filter)
        assert "Invalid comparator" in str(e.value.message)
示例#2
0
    def _search_runs(self, experiment_ids, filter_string, run_view_type,
                     max_results, order_by, page_token):
        # TODO: push search query into backend database layer
        if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
            raise MlflowException(
                "Invalid value for request parameter max_results. It must be at "
                "most {}, but got value {}".format(
                    SEARCH_MAX_RESULTS_THRESHOLD, max_results),
                INVALID_PARAMETER_VALUE)

        stages = set(LifecycleStage.view_type_to_stages(run_view_type))
        with self.ManagedSessionMaker() as session:
            # Fetch the appropriate runs and eagerly load their summary metrics, params, and
            # tags. These run attributes are referenced during the invocation of
            # ``run.to_mlflow_entity()``, so eager loading helps avoid additional database queries
            # that are otherwise executed at attribute access time under a lazy loading model.
            queried_runs = session \
                .query(SqlRun) \
                .options(*self._get_eager_run_query_options()) \
                .filter(
                    SqlRun.experiment_id.in_(experiment_ids),
                    SqlRun.lifecycle_stage.in_(stages)) \
                .all()
            runs = [run.to_mlflow_entity() for run in queried_runs]

        filtered = SearchUtils.filter(runs, filter_string)
        sorted_runs = SearchUtils.sort(filtered, order_by)
        runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token,
                                                     max_results)
        return runs, next_page_token
示例#3
0
def test_correct_filtering(filter_string, matching_runs):
    runs = [
        Run(run_info=RunInfo(
            run_uuid="hi", run_id="hi", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 121, 1, 0)],
                params=[Param("my_param", "A")],
                tags=[])),
        Run(run_info=RunInfo(
            run_uuid="hi2", run_id="hi2", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FINISHED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 123, 1, 0)],
                params=[Param("my_param", "A")],
                tags=[RunTag("tag1", "C")])),
        Run(run_info=RunInfo(
            run_uuid="hi3", run_id="hi3", experiment_id=1,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 125, 1, 0)],
                params=[Param("my_param", "B")],
                tags=[RunTag("tag1", "D")])),
    ]
    filtered_runs = SearchUtils.filter(runs, filter_string)
    assert set(filtered_runs) == set([runs[i] for i in matching_runs])
示例#4
0
def test_filter_runs_by_start_time():
    runs = [
        Run(
            run_info=RunInfo(
                run_uuid=run_id,
                run_id=run_id,
                experiment_id=0,
                user_id="user-id",
                status=RunStatus.to_string(RunStatus.FINISHED),
                start_time=idx,
                end_time=1,
                lifecycle_stage=LifecycleStage.ACTIVE,
            ),
            run_data=RunData(),
        ) for idx, run_id in enumerate(["a", "b", "c"])
    ]
    assert SearchUtils.filter(runs, "attribute.start_time >= 0") == runs
    assert SearchUtils.filter(runs, "attribute.start_time > 1") == runs[2:]
    assert SearchUtils.filter(runs, "attribute.start_time = 2") == runs[2:]
示例#5
0
 def _search_runs(self, experiment_ids, filter_string, run_view_type,
                  max_results, order_by, page_token):
     if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
         raise MlflowException(
             "Invalid value for request parameter max_results. It must be at "
             "most {}, but got value {}".format(
                 SEARCH_MAX_RESULTS_THRESHOLD, max_results),
             databricks_pb2.INVALID_PARAMETER_VALUE)
     runs = []
     for experiment_id in experiment_ids:
         run_infos = self._list_run_infos(experiment_id, run_view_type)
         runs.extend(self.get_run(r.run_id) for r in run_infos)
     filtered = SearchUtils.filter(runs, filter_string)
     sorted_runs = SearchUtils.sort(filtered, order_by)
     runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token,
                                                  max_results)
     return runs, next_page_token
示例#6
0
 def _search_runs(self, experiment_ids, filter_string, run_view_type, max_results, order_by,
                  page_token):
     if page_token:
         raise MlflowException("SQLAlchemy-backed tracking stores do not yet support pagination"
                               "tokens.")
     # TODO: push search query into backend database layer
     if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
         raise MlflowException("Invalid value for request parameter max_results. It must be at "
                               "most {}, but got value {}".format(SEARCH_MAX_RESULTS_THRESHOLD,
                                                                  max_results),
                               INVALID_PARAMETER_VALUE)
     with self.ManagedSessionMaker() as session:
         runs = [run.to_mlflow_entity()
                 for exp in experiment_ids
                 for run in self._list_runs(session, exp, run_view_type)]
         filtered = SearchUtils.filter(runs, filter_string)
         runs = SearchUtils.sort(filtered, order_by)[:max_results]
         return runs, None
示例#7
0
 def search_runs(self,
                 experiment_ids,
                 filter_string,
                 run_view_type,
                 max_results=SEARCH_MAX_RESULTS_THRESHOLD,
                 order_by=None):
     # TODO: push search query into backend database layer
     if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
         raise MlflowException(
             "Invalid value for request parameter max_results. It must be at "
             "most {}, but got value {}".format(
                 SEARCH_MAX_RESULTS_THRESHOLD, max_results),
             INVALID_PARAMETER_VALUE)
     with self.ManagedSessionMaker() as session:
         runs = [
             run.to_mlflow_entity() for exp in experiment_ids
             for run in self._list_runs(session, exp, run_view_type)
         ]
         filtered = SearchUtils.filter(runs, filter_string)
         return SearchUtils.sort(filtered, order_by)[:max_results]
示例#8
0
    def _search_runs(
        self,
        experiment_ids,
        filter_string,
        run_view_type,
        max_results,
        order_by,
        page_token,
    ):
        if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
            raise MlflowException(
                "Invalid value for request parameter max_results. It must be at "
                "most {}, but got value {}".format(
                    SEARCH_MAX_RESULTS_THRESHOLD, max_results),
                INVALID_PARAMETER_VALUE,
            )
        runs = []
        for experiment_id in experiment_ids:
            run_ids = self._list_runs_ids(experiment_id, run_view_type)
            run_infos = [
                _dict_to_run_info(r) for r in self._get_run_list(run_ids)
            ]
            for run_info in run_infos:
                # Load the metrics, params and tags for the run
                run_id = run_info.run_id
                metrics = self.get_all_metrics(run_id)
                params = self.get_all_params(run_id)
                tags = self.get_all_tags(run_id)
                run = Run(run_info, RunData(metrics, params, tags))
                runs.append(run)

        filtered = SearchUtils.filter(runs, filter_string)
        sorted_runs = SearchUtils.sort(filtered, order_by)
        runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token,
                                                     max_results)
        return runs, next_page_token