Exemplo n.º 1
0
def test_log_batch(mlflow_client, backend_store_uri):
    experiment_id = mlflow_client.create_experiment('Batch em up')
    created_run = mlflow_client.create_run(experiment_id)
    run_id = created_run.info.run_id
    mlflow_client.log_batch(
        run_id=run_id,
        metrics=[Metric("metric", 123.456, 789, 3)], params=[Param("param", "value")],
        tags=[RunTag("taggity", "do-dah")])
    run = mlflow_client.get_run(run_id)
    assert run.data.metrics.get('metric') == 123.456
    assert run.data.params.get('param') == 'value'
    assert run.data.tags.get('taggity') == 'do-dah'
    metric_history = mlflow_client.get_metric_history(run_id, "metric")
    assert len(metric_history) == 1
    metric = metric_history[0]
    assert metric.key == "metric"
    assert metric.value == 123.456
    assert metric.timestamp == 789
    assert metric.step == 3
Exemplo n.º 2
0
def test_log_batch(tracking_uri_mock):
    expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0}
    expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"}
    exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"}
    approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])

    t = int(time.time())
    metrics = [
        Metric(key=key, value=value, timestamp=t)
        for key, value in expected_metrics.items()
    ]
    params = [
        Param(key=key, value=value) for key, value in expected_params.items()
    ]
    tags = [
        RunTag(key=key, value=value)
        for key, value in exact_expected_tags.items()
    ]

    active_run = start_run()
    run_uuid = active_run.info.run_uuid
    with active_run:
        mlflow.tracking.MlflowClient().log_batch(run_id=run_uuid,
                                                 metrics=metrics,
                                                 params=params,
                                                 tags=tags)
    finished_run = tracking.MlflowClient().get_run(run_uuid)
    # Validate metrics
    assert len(finished_run.data.metrics) == 2
    for metric in finished_run.data.metrics:
        assert expected_metrics[metric.key] == metric.value
    # Validate tags (for automatically-set tags)
    assert len(finished_run.data.tags
               ) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag in finished_run.data.tags:
        if tag.key in approx_expected_tags:
            pass
        else:
            assert exact_expected_tags[tag.key] == tag.value
    # Validate params
    assert len(finished_run.data.params) == 2
    for param in finished_run.data.params:
        assert expected_params[param.key] == param.value
Exemplo n.º 3
0
def _log_batch():
    _validate_batch_log_api_req(_get_request_json())
    request_message = _get_request_message(LogBatch())
    metrics = [
        Metric.from_proto(proto_metric)
        for proto_metric in request_message.metrics
    ]
    params = [
        Param.from_proto(proto_param) for proto_param in request_message.params
    ]
    tags = [RunTag.from_proto(proto_tag) for proto_tag in request_message.tags]
    _get_tracking_store().log_batch(run_id=request_message.run_id,
                                    metrics=metrics,
                                    params=params,
                                    tags=tags)
    response_message = LogBatch.Response()
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Exemplo n.º 4
0
def test_list_all_columns_big(init_store):
    new_tags = []
    new_tags_key = []
    for i in range(100):
        new_tags.append(RunTag(f'my_tag{i}', f'val{i}'))
        new_tags_key.append(f'my_tag{i}')
    init_store.log_batch("7b2e71956f3d4c08b042624a8d83700d", metrics=[], params=[], tags=new_tags)
    new_tags_key.sort()
    # Wait for Elasticsearch refresh for search
    time.sleep(2)
    expected_columns = Columns(metrics=["big_metric", "inf_metric", "metric0", "metric1",
                                        "metric_batch1", "metric_batch2", "nan_metric",
                                        "negative_inf_metric", "new_metric"],
                               params=["new_param", "param0", "param1",
                                       "param2", "param3", "param_batch1", "param_batch2"],
                               tags=[*new_tags_key, *["new_tag", "tag0", "tag1", "tag2", "tag3",
                                                      "tag_batch1", "tag_batch2"]])
    actual_columns = init_store.list_all_columns("hTb553MBNoOYfhXjnnQh", ViewType.ACTIVE_ONLY)
    assert expected_columns.__dict__ == actual_columns.__dict__
Exemplo n.º 5
0
def _create_entity(base, model):

    # create dict of kwargs properties for entity and return the initialized entity
    config = {}
    for k in base._properties():
        # check if its mlflow entity and build it
        obj = getattr(model, k)

        if isinstance(model, SqlRun):
            if base is RunData:
                # Run data contains list for metrics, params and tags
                # so obj will be a list so we need to convert those items
                if k == 'metrics':
                    # only get latest recorded metrics per key
                    metrics = {}
                    for o in obj:
                        existing_metric = metrics.get(o.key)
                        if (existing_metric is None)\
                            or ((o.step, o.timestamp, o.value) >=
                                (existing_metric.step, existing_metric.timestamp,
                                 existing_metric.value)):
                            metrics[o.key] = Metric(o.key, o.value,
                                                    o.timestamp, o.step)
                    obj = list(metrics.values())
                elif k == 'params':
                    obj = [Param(o.key, o.value) for o in obj]
                elif k == 'tags':
                    obj = [RunTag(o.key, o.value) for o in obj]
            elif base is RunInfo:
                if k == 'source_type':
                    obj = SourceType.from_string(obj)
                elif k == "status":
                    obj = RunStatus.from_string(obj)
                elif k == "experiment_id":
                    obj = str(obj)

        # Our data model defines experiment_ids as ints, but the in-memory representation was
        # changed to be a string in time for 1.0.
        if isinstance(model, SqlExperiment) and k == "experiment_id":
            obj = str(obj)

        config[k] = obj
    return base(**config)
Exemplo n.º 6
0
def _create_run():
    request_message = _get_request_message(CreateRun())

    tags = [RunTag(tag.key, tag.value) for tag in request_message.tags]
    run = _get_store().create_run(
        experiment_id=request_message.experiment_id,
        user_id=request_message.user_id,
        run_name=request_message.run_name,
        source_type=request_message.source_type,
        source_name=request_message.source_name,
        entry_point_name=request_message.entry_point_name,
        start_time=request_message.start_time,
        source_version=request_message.source_version,
        tags=tags)

    response_message = CreateRun.Response()
    response_message.run.MergeFrom(run.to_proto())
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Exemplo n.º 7
0
def test_faculty_run_to_mlflow_run_name_backwards_compatability(
    faculty_attribute, faculty_tag_value, mlflow_attribute, mlflow_tag_value
):
    """Test logic setting run name tag when not available."""

    if faculty_tag_value is None:
        extra_tags = []
    else:
        extra_tags = [FacultyTag(MLFLOW_RUN_NAME, faculty_tag_value)]
    faculty_run = FACULTY_RUN._replace(
        name=faculty_attribute, tags=FACULTY_RUN.tags + extra_tags
    )

    if mlflow_tag_value is None:
        name_tag = None
    else:
        name_tag = RunTag(MLFLOW_RUN_NAME, mlflow_tag_value)
    expected_run = mlflow_run(name=mlflow_attribute, name_tag=name_tag)

    assert run_equals(faculty_run_to_mlflow_run(faculty_run), expected_run)
Exemplo n.º 8
0
def test_faculty_run_to_mlflow_run_parent_run_id_backwards_compatability(
    faculty_attribute, faculty_tag_value, mlflow_tag_value
):
    """Test logic setting parent run ID tag when not available."""

    if faculty_tag_value is None:
        extra_tags = []
    else:
        extra_tags = [FacultyTag(MLFLOW_PARENT_RUN_ID, faculty_tag_value)]
    faculty_run = FACULTY_RUN._replace(
        parent_run_id=faculty_attribute, tags=FACULTY_RUN.tags + extra_tags
    )

    if mlflow_tag_value is None:
        parent_run_id_tag = None
    else:
        parent_run_id_tag = RunTag(MLFLOW_PARENT_RUN_ID, mlflow_tag_value)
    expected_run = mlflow_run(parent_run_id_tag=parent_run_id_tag)

    assert run_equals(faculty_run_to_mlflow_run(faculty_run), expected_run)
Exemplo n.º 9
0
def test_log_batch(tracking_uri_mock, tmpdir):
    expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0}
    expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"}
    exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"}
    approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])

    t = int(time.time())
    sorted_expected_metrics = sorted(expected_metrics.items(), key=lambda kv: kv[0])
    metrics = [Metric(key=key, value=value, timestamp=t, step=i)
               for i, (key, value) in enumerate(sorted_expected_metrics)]
    params = [Param(key=key, value=value) for key, value in expected_params.items()]
    tags = [RunTag(key=key, value=value) for key, value in exact_expected_tags.items()]

    with start_run() as active_run:
        run_id = active_run.info.run_id
        mlflow.tracking.MlflowClient().log_batch(run_id=run_id, metrics=metrics, params=params,
                                                 tags=tags)
    client = tracking.MlflowClient()
    finished_run = client.get_run(run_id)
    # Validate metrics
    assert len(finished_run.data.metrics) == 2
    for key, value in finished_run.data.metrics.items():
        assert expected_metrics[key] == value
    metric_history0 = client.get_metric_history(run_id, "metric-key0")
    assert set([(m.value, m.timestamp, m.step) for m in metric_history0]) == set([
        (1.0, t, 0),
    ])
    metric_history1 = client.get_metric_history(run_id, "metric-key1")
    assert set([(m.value, m.timestamp, m.step) for m in metric_history1]) == set([
        (4.0, t, 1),
    ])

    # Validate tags (for automatically-set tags)
    assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag_key, tag_value in finished_run.data.tags.items():
        if tag_key in approx_expected_tags:
            pass
        else:
            assert exact_expected_tags[tag_key] == tag_value
    # Validate params
    assert finished_run.data.params == expected_params
Exemplo n.º 10
0
def test_log_batch(mlflow_client):
    experiment_id = mlflow_client.create_experiment('Batch em up')
    created_run = mlflow_client.create_run(experiment_id)
    run_id = created_run.info.run_uuid
    # TODO(sid): pass and assert on step
    mlflow_client.log_batch(run_id=run_id,
                            metrics=[Metric("metric", 123.456, 789, 0)],
                            params=[Param("param", "value")],
                            tags=[RunTag("taggity", "do-dah")])
    run = mlflow_client.get_run(run_id)
    assert run.data.metrics.get('metric') == 123.456
    assert run.data.params.get('param') == 'value'
    assert run.data.tags.get('taggity') == 'do-dah'
    # TODO(sid): replace this with mlflow_client.get_metric_history
    fs = FileStore(server_root_dir)
    metric_history = fs.get_metric_history(run_id, "metric")
    assert len(metric_history) == 1
    metric = metric_history[0]
    assert metric.key == "metric"
    assert metric.value == 123.456
    assert metric.timestamp == 789
Exemplo n.º 11
0
    def test_log_batch_internal_error(self):
        # Verify that internal errors during the DB save step for log_batch result in
        # MlflowExceptions
        run = self._run_factory()

        def _raise_exception_fn(*args, **kwargs):  # pylint: disable=unused-argument
            raise Exception("Some internal error")
        with mock.patch("mlflow.store.sqlalchemy_store.SqlAlchemyStore.log_metric") as metric_mock,\
                mock.patch(
                    "mlflow.store.sqlalchemy_store.SqlAlchemyStore.log_param") as param_mock,\
                mock.patch("mlflow.store.sqlalchemy_store.SqlAlchemyStore.set_tag") as tags_mock:
            metric_mock.side_effect = _raise_exception_fn
            param_mock.side_effect = _raise_exception_fn
            tags_mock.side_effect = _raise_exception_fn
            for kwargs in [{"metrics": [Metric("a", 3, 1)]}, {"params": [Param("b", "c")]},
                           {"tags": [RunTag("c", "d")]}]:
                log_batch_kwargs = {"metrics": [], "params": [], "tags": []}
                log_batch_kwargs.update(kwargs)
                with self.assertRaises(MlflowException) as e:
                    self.store.log_batch(run.info.run_uuid, **log_batch_kwargs)
                self.assertIn(str(e.exception.message), "Some internal error")
Exemplo n.º 12
0
def _create_entity(base, model):

    # create dict of kwargs properties for entity and return the intialized entity
    config = {}
    for k in base._properties():
        # check if its mlflow entity and build it
        obj = getattr(model, k)

        # Run data contains list for metrics, params and tags
        # so obj will be a list so we need to convert those items
        if k == 'metrics':
            obj = [Metric(o.key, o.value, o.timestamp) for o in obj]

        if k == 'params':
            obj = [Param(o.key, o.value) for o in obj]

        if k == 'tags':
            obj = [RunTag(o.key, o.value) for o in obj]

        config[k] = obj
    return base(**config)
Exemplo n.º 13
0
    def _generate_run(self, i, runs_dict):
        """
        Generate a run object and save to runs_dict keyed by run_id.
        Most of data just depends on i, and some data are hard-coded for simplicityGenerate n number of runs. Most of
        data just depends on n, and some data are hard-coded for simplicity.
        """
        key = f"key{i}"
        value = f"value{i}"
        start_time = 123456 * i
        end_time = start_time + (1000 * i)
        run_id = f"run_id_{i}"

        metrics = [Metric(key, value, start_time, "stage")]
        params = [Param(key, value)]
        tags = [RunTag(key, value)]
        run_info = RunInfo(run_id, "experiment_id", "user_id", "status",
                           start_time, end_time, "lifecycle_stage")
        run = Run(run_info=run_info,
                  run_data=RunData(metrics=metrics, params=params, tags=tags))
        runs_dict[run_id] = run
        return run
Exemplo n.º 14
0
    def test_create_run_returns_expected_run_data(self):
        fs = FileStore(self.test_root)
        no_tags_run = fs.create_run(
            experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, user_id='user', start_time=0, tags=[])
        assert isinstance(no_tags_run.data, RunData)
        assert len(no_tags_run.data.tags) == 0

        tags_dict = {
            "my_first_tag": "first",
            "my-second-tag": "2nd",
        }
        tags_entities = [
            RunTag(key, value) for key, value in tags_dict.items()
        ]
        tags_run = fs.create_run(
            experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
            user_id='user',
            start_time=0,
            tags=tags_entities)
        assert isinstance(tags_run.data, RunData)
        assert tags_run.data.tags == tags_dict
Exemplo n.º 15
0
    def _log_dataset_tag(self, client, run_id):
        """
        Log dataset metadata as a tag "mlflow.datasets", if the tag already exists, it will
        append current dataset metadata into existing tag content.
        """
        existing_dataset_metadata_str = client.get_run(run_id).data.tags.get(
            "mlflow.datasets", "[]")
        dataset_metadata_list = json.loads(existing_dataset_metadata_str)

        for metadata in dataset_metadata_list:
            if metadata["hash"] == self.hash and metadata[
                    "name"] == self._user_specified_name:
                break
        else:
            dataset_metadata_list.append(self._metadata)

        dataset_metadata_str = json.dumps(dataset_metadata_list,
                                          separators=(",", ":"))
        client.log_batch(
            run_id,
            tags=[RunTag("mlflow.datasets", dataset_metadata_str)],
        )
Exemplo n.º 16
0
 def _create():
     metrics = [
         Metric(random_str(10), random_int(0, 1000),
                int(time.time() + random_int(-1e4, 1e4)))
         for _ in range(100)
     ]
     params = [
         Param(random_str(10), random_str(random_int(10, 35)))
         for _ in range(10)
     ]  # noqa
     tags = [
         RunTag(random_str(10), random_str(random_int(10, 35)))
         for _ in range(10)
     ]  # noqa
     rd = RunData()
     for p in params:
         rd._add_param(p)
     for m in metrics:
         rd._add_metric(m)
     for t in tags:
         rd._add_tag(t)
     return rd, metrics, params, tags
Exemplo n.º 17
0
    def create_run(self,
                   experiment_id,
                   user_id=None,
                   run_name=None,
                   source_type=None,
                   source_name=None,
                   entry_point_name=None,
                   start_time=None,
                   source_version=None,
                   tags=None,
                   parent_run_id=None):
        """
        Create a :py:class:`mlflow.entities.Run` object that can be associated with
        metrics, parameters, artifacts, etc.
        Unlike :py:func:`mlflow.projects.run`, creates objects but does not run code.
        Unlike :py:func:`mlflow.start_run`, does not change the "active run" used by
        :py:func:`mlflow.log_param`.

        :param user_id: If not provided, use the current user as a default.
        :param start_time: If not provided, use the current timestamp.
        :param tags: A dictionary of key-value pairs that are converted into
                     :py:class:`mlflow.entities.RunTag` objects.
        :return: :py:class:`mlflow.entities.Run` that was created.
        """
        tags = tags if tags else {}
        return self.store.create_run(
            experiment_id=experiment_id,
            user_id=user_id if user_id is not None else _get_user_id(),
            run_name=run_name,
            source_type=source_type
            if source_type is not None else SourceType.LOCAL,
            source_name=source_name
            if source_name is not None else "Python Application",
            entry_point_name=entry_point_name,
            start_time=start_time or int(time.time() * 1000),
            source_version=source_version,
            tags=[RunTag(key, value) for (key, value) in iteritems(tags)],
            parent_run_id=parent_run_id,
        )
Exemplo n.º 18
0
    def test_log_batch_internal_error(self):
        # Verify that internal errors during log_batch result in MlflowExceptions
        fs = FileStore(self.test_root)
        run = self._create_run(fs)

        def _raise_exception_fn(*args, **kwargs):  # pylint: disable=unused-argument
            raise Exception("Some internal error")
        with mock.patch("mlflow.store.file_store.FileStore.log_metric") as log_metric_mock, \
                mock.patch("mlflow.store.file_store.FileStore.log_param") as log_param_mock, \
                mock.patch("mlflow.store.file_store.FileStore.set_tag") as set_tag_mock:
            log_metric_mock.side_effect = _raise_exception_fn
            log_param_mock.side_effect = _raise_exception_fn
            set_tag_mock.side_effect = _raise_exception_fn
            for kwargs in [{"metrics": [Metric("a", 3, 1)]}, {"params": [Param("b", "c")]},
                           {"tags": [RunTag("c", "d")]}]:
                log_batch_kwargs = {"metrics": [], "params": [], "tags": []}
                log_batch_kwargs.update(kwargs)
                print(log_batch_kwargs)
                with self.assertRaises(MlflowException) as e:
                    fs.log_batch(run.info.run_uuid, **log_batch_kwargs)
                self.assertIn(str(e.exception.message), "Some internal error")
                assert e.exception.error_code == ErrorCode.Name(INTERNAL_ERROR)
Exemplo n.º 19
0
    def create_run(self, experiment_id, user_id, run_name, source_type, source_name,
                   entry_point_name, start_time, source_version, tags, parent_run_id):
        """
        Creates a run under the specified experiment ID, setting the run's status to "RUNNING"
        and the start time to the current time.

        :param experiment_id: ID of the experiment for this run
        :param user_id: ID of the user launching this run
        :param source_type: Enum (integer) describing the source of the run
        :return: The created Run object
        """
        tag_protos = [tag.to_proto() for tag in tags]
        req_body = message_to_json(CreateRun(
            experiment_id=experiment_id, user_id=user_id, run_name="",
            source_type=source_type, source_name=source_name, entry_point_name=entry_point_name,
            start_time=start_time, source_version=source_version, tags=tag_protos,
            parent_run_id=parent_run_id))
        response_proto = self._call_endpoint(CreateRun, req_body)
        run = Run.from_proto(response_proto.run)
        if run_name:
            self.set_tag(run.info.run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name))
        return run
Exemplo n.º 20
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags):
     """
     Creates a run with the specified attributes.
     """
     if self.get_experiment(experiment_id) is None:
         raise Exception(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="",
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        lifecycle_stage=RunInfo.ACTIVE_LIFECYCLE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME,
                _make_persisted_run_info_dict(run_info))
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     if run_name:
         self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name))
     return Run(run_info=run_info, run_data=None)
Exemplo n.º 21
0
    def import_run_data(self, run_dct, run_id, src_user_id):
        from mlflow.entities import Metric, Param, RunTag
        now = round(time.time())
        params = [Param(k, v) for k, v in run_dct['params'].items()]
        metrics = [
            Metric(k, v, now, 0) for k, v in run_dct['metrics'].items()
        ]  # TODO: missing timestamp and step semantics?

        tags = run_dct['tags']
        if not self.import_mlflow_tags:  # remove mlflow tags
            keys = [k for k in tags.keys() if k.startswith("mlflow.")]
            for k in keys:
                tags.pop(k)
        if not self.import_mlflow_tools_tags:  # remove mlflow_tools tags
            keys = [k for k in tags.keys() if k.startswith("mlflow_tools.")]
            for k in keys:
                tags.pop(k)
        tags = [RunTag(k, str(v)) for k, v in tags.items()]

        if not self.in_databricks:
            utils.set_dst_user_id(tags, src_user_id, self.use_src_user_id)
        #self.dump_tags(tags)
        self.client.log_batch(run_id, metrics, params, tags)
Exemplo n.º 22
0
 def _create_run(self, run):
     args = {"run_name": run.run_name} if run.run_name else {}
     with mlflow.start_run(**args) as active_run:
         params = [Param(k, v) for k, v in run.params.items()]
         metrics = [
             Metric(k, v, int(time.time()), 0)
             for k, v in run.metrics.items()
         ]  # TODO: timestamp and step?
         tags = [RunTag(k, v) for k, v in run.tags.items()]
         mlflow.tracking.MlflowClient().log_batch(active_run.info.run_id,
                                                  metrics, params, tags)
         if self.do_tag:
             mlflow.set_tag("replayed", "true")
         for m in run.models:
             if m.model:
                 m.log_model_func(m.model, m.model_name)
         for a in run.artifacts:
             if a.artifact_local_path:
                 if a.artifact_local_path:
                     with open(a.artifact_local_path, "wb") as f:
                         f.write(a.artifact_bytes)
                 mlflow.log_artifact(a.artifact_local_path, a.artifact_path)
         return active_run.info.run_id
Exemplo n.º 23
0
    def record_logged_model(self, run_id, mlflow_model):
        if not isinstance(mlflow_model, Model):
            raise TypeError(
                "Argument 'mlflow_model' should be mlflow.models.Model, got '{}'"
                .format(type(mlflow_model)))
        _validate_run_id(run_id)
        run_info = self._get_run_info(run_id)
        check_run_is_active(run_info)
        model_dict = mlflow_model.to_dict()
        run_info = self._get_run_info(run_id)
        path = self._get_tag_path(run_info.experiment_id, run_info.run_id,
                                  MLFLOW_LOGGED_MODELS)
        if os.path.exists(path):
            with open(path, "r") as f:
                model_list = json.loads(f.read())
        else:
            model_list = []
        tag = RunTag(MLFLOW_LOGGED_MODELS,
                     json.dumps(model_list + [model_dict]))

        try:
            self._set_run_tag(run_info, tag)
        except Exception as e:
            raise MlflowException(e, INTERNAL_ERROR)
Exemplo n.º 24
0
    def test_search_tags(self):
        fs = FileStore(self.test_root)
        experiment_id = self.experiments[0]
        r1 = fs.create_run(
            experiment_id, 'user', 'name', 'source_type', 'source_name', 'entry_point_name', 0,
            None, [], None).info.run_uuid
        r2 = fs.create_run(
            experiment_id, 'user', 'name', 'source_type', 'source_name', 'entry_point_name', 0,
            None, [], None).info.run_uuid

        fs.set_tag(r1, RunTag('generic_tag', 'p_val'))
        fs.set_tag(r2, RunTag('generic_tag', 'p_val'))

        fs.set_tag(r1, RunTag('generic_2', 'some value'))
        fs.set_tag(r2, RunTag('generic_2', 'another value'))

        fs.set_tag(r1, RunTag('p_a', 'abc'))
        fs.set_tag(r2, RunTag('p_b', 'ABC'))

        # test search returns both runs
        six.assertCountEqual(self, [r1, r2], self._search(fs, experiment_id,
                                                          filter_str="tags.generic_tag = 'p_val'"))
        # test search returns appropriate run (same key different values per run)
        six.assertCountEqual(self, [r1],
                             self._search(fs, experiment_id,
                                          filter_str="tags.generic_2 = 'some value'"))
        six.assertCountEqual(self, [r2], self._search(fs, experiment_id,
                                                      filter_str="tags.generic_2='another value'"))
        six.assertCountEqual(self, [], self._search(fs, experiment_id,
                                                    filter_str="tags.generic_tag = 'wrong_val'"))
        six.assertCountEqual(self, [], self._search(fs, experiment_id,
                                                    filter_str="tags.generic_tag != 'p_val'"))
        six.assertCountEqual(self, [r1, r2],
                             self._search(fs, experiment_id,
                                          filter_str="tags.generic_tag != 'wrong_val'"))
        six.assertCountEqual(self, [r1, r2],
                             self._search(fs, experiment_id,
                                          filter_str="tags.generic_2 != 'wrong_val'"))
        six.assertCountEqual(self, [r1], self._search(fs, experiment_id,
                                                      filter_str="tags.p_a = 'abc'"))
        six.assertCountEqual(self, [r2], self._search(fs, experiment_id,
                                                      filter_str="tags.p_b = 'ABC'"))
Exemplo n.º 25
0
    def test_search_tags(self):
        fs = self._get_store()
        experiment_id = self.experiments[0]
        r1 = fs.create_run(experiment_id, "user", 0, []).info.run_id
        r2 = fs.create_run(experiment_id, "user", 0, []).info.run_id

        fs.set_tag(r1, RunTag("generic_tag", "p_val"))
        fs.set_tag(r2, RunTag("generic_tag", "p_val"))

        fs.set_tag(r1, RunTag("generic_2", "some value"))
        fs.set_tag(r2, RunTag("generic_2", "another value"))

        fs.set_tag(r1, RunTag("p_a", "abc"))
        fs.set_tag(r2, RunTag("p_b", "ABC"))

        # test search returns both runs
        six.assertCountEqual(
            self, [r1, r2],
            self._search(fs,
                         experiment_id,
                         filter_str="tags.generic_tag = 'p_val'"))
        # test search returns appropriate run (same key different values per run)
        six.assertCountEqual(
            self, [r1],
            self._search(fs,
                         experiment_id,
                         filter_str="tags.generic_2 = 'some value'"))
        six.assertCountEqual(
            self, [r2],
            self._search(fs,
                         experiment_id,
                         filter_str="tags.generic_2='another value'"))
        six.assertCountEqual(
            self, [],
            self._search(fs,
                         experiment_id,
                         filter_str="tags.generic_tag = 'wrong_val'"))
        six.assertCountEqual(
            self, [],
            self._search(fs,
                         experiment_id,
                         filter_str="tags.generic_tag != 'p_val'"))
        six.assertCountEqual(
            self,
            [r1, r2],
            self._search(fs,
                         experiment_id,
                         filter_str="tags.generic_tag != 'wrong_val'"),
        )
        six.assertCountEqual(
            self,
            [r1, r2],
            self._search(fs,
                         experiment_id,
                         filter_str="tags.generic_2 != 'wrong_val'"),
        )
        six.assertCountEqual(
            self, [r1],
            self._search(fs, experiment_id, filter_str="tags.p_a = 'abc'"))
        six.assertCountEqual(
            self, [r2],
            self._search(fs, experiment_id, filter_str="tags.p_b = 'ABC'"))
Exemplo n.º 26
0
 def _get_tag_from_file(parent_path, tag_name):
     _validate_tag_name(tag_name)
     tag_data = read_file(parent_path, tag_name)
     return RunTag(tag_name, tag_data)
Exemplo n.º 27
0
    def test_requestor(self, request):
        response = mock.MagicMock
        response.status_code = 200
        response.text = '{}'
        request.return_value = response

        creds = MlflowHostCreds('https://hello')
        store = RestStore(lambda: creds)

        user_name = "mock user"
        source_name = "rest test"

        source_name_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_name",
            return_value=source_name)
        source_type_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_type",
            return_value=SourceType.LOCAL)
        with mock.patch('mlflow.store.rest_store.http_request') as mock_http, \
                mock.patch('mlflow.tracking.utils._get_store', return_value=store), \
                mock.patch('mlflow.tracking.context.default_context._get_user',
                           return_value=user_name), \
                mock.patch('time.time', return_value=13579), \
                source_name_patch, source_type_patch:
            with mlflow.start_run(experiment_id="43"):
                cr_body = message_to_json(
                    CreateRun(experiment_id="43",
                              user_id=user_name,
                              start_time=13579000,
                              tags=[
                                  ProtoRunTag(key='mlflow.source.name',
                                              value=source_name),
                                  ProtoRunTag(key='mlflow.source.type',
                                              value='LOCAL'),
                                  ProtoRunTag(key='mlflow.user',
                                              value=user_name)
                              ]))
                expected_kwargs = self._args(creds, "runs/create", "POST",
                                             cr_body)

                assert mock_http.call_count == 1
                actual_kwargs = mock_http.call_args[1]

                # Test the passed tag values separately from the rest of the request
                # Tag order is inconsistent on Python 2 and 3, but the order does not matter
                expected_tags = expected_kwargs['json'].pop('tags')
                actual_tags = actual_kwargs['json'].pop('tags')
                assert (sorted(expected_tags,
                               key=lambda t: t['key']) == sorted(
                                   actual_tags, key=lambda t: t['key']))
                assert expected_kwargs == actual_kwargs

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.log_param("some_uuid", Param("k1", "v1"))
            body = message_to_json(
                LogParam(run_uuid="some_uuid",
                         run_id="some_uuid",
                         key="k1",
                         value="v1"))
            self._verify_requests(mock_http, creds, "runs/log-parameter",
                                  "POST", body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000))
            body = message_to_json(
                SetTag(run_uuid="some_uuid",
                       run_id="some_uuid",
                       key="t1",
                       value="abcd" * 1000))
            self._verify_requests(mock_http, creds, "runs/set-tag", "POST",
                                  body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.delete_tag("some_uuid", "t1")
            body = message_to_json(DeleteTag(run_id="some_uuid", key="t1"))
            self._verify_requests(mock_http, creds, "runs/delete-tag", "POST",
                                  body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.log_metric("u2", Metric("m1", 0.87, 12345, 3))
            body = message_to_json(
                LogMetric(run_uuid="u2",
                          run_id="u2",
                          key="m1",
                          value=0.87,
                          timestamp=12345,
                          step=3))
            self._verify_requests(mock_http, creds, "runs/log-metric", "POST",
                                  body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            metrics = [
                Metric("m1", 0.87, 12345, 0),
                Metric("m2", 0.49, 12345, -1),
                Metric("m3", 0.58, 12345, 2)
            ]
            params = [Param("p1", "p1val"), Param("p2", "p2val")]
            tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")]
            store.log_batch(run_id="u2",
                            metrics=metrics,
                            params=params,
                            tags=tags)
            metric_protos = [metric.to_proto() for metric in metrics]
            param_protos = [param.to_proto() for param in params]
            tag_protos = [tag.to_proto() for tag in tags]
            body = message_to_json(
                LogBatch(run_id="u2",
                         metrics=metric_protos,
                         params=param_protos,
                         tags=tag_protos))
            self._verify_requests(mock_http, creds, "runs/log-batch", "POST",
                                  body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.delete_run("u25")
            self._verify_requests(mock_http, creds, "runs/delete", "POST",
                                  message_to_json(DeleteRun(run_id="u25")))

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.restore_run("u76")
            self._verify_requests(mock_http, creds, "runs/restore", "POST",
                                  message_to_json(RestoreRun(run_id="u76")))

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.delete_experiment("0")
            self._verify_requests(
                mock_http, creds, "experiments/delete", "POST",
                message_to_json(DeleteExperiment(experiment_id="0")))

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.restore_experiment("0")
            self._verify_requests(
                mock_http, creds, "experiments/restore", "POST",
                message_to_json(RestoreExperiment(experiment_id="0")))

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            response = mock.MagicMock
            response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}'
            mock_http.return_value = response
            result = store.search_runs(["0", "1"],
                                       "params.p1 = 'a'",
                                       ViewType.ACTIVE_ONLY,
                                       max_results=10,
                                       order_by=["a"],
                                       page_token="12345abcde")

            expected_message = SearchRuns(experiment_ids=["0", "1"],
                                          filter="params.p1 = 'a'",
                                          run_view_type=ViewType.to_proto(
                                              ViewType.ACTIVE_ONLY),
                                          max_results=10,
                                          order_by=["a"],
                                          page_token="12345abcde")
            self._verify_requests(mock_http, creds, "runs/search", "POST",
                                  message_to_json(expected_message))
            assert result.token == "67890fghij"
Exemplo n.º 28
0
 def test_log_batch_allows_tag_overwrite_single_req(self):
     fs = FileStore(self.test_root)
     run = self._create_run(fs)
     tags = [RunTag("t-key", "val"), RunTag("t-key", "newval")]
     fs.log_batch(run.info.run_uuid, metrics=[], params=[], tags=tags)
     self._verify_logged(fs, run.info.run_uuid, metrics=[], params=[], tags=[tags[-1]])
Exemplo n.º 29
0
def faculty_tag_to_mlflow_tag(faculty_tag):
    return RunTag(key=faculty_tag.key, value=faculty_tag.value)
Exemplo n.º 30
0
 def set_tag(self, run_id, key, value):
     """Sets a tag on the given run id. Value will be converted to a string."""
     _validate_tag_name(key)
     tag = RunTag(key, str(value))
     self.store.set_tag(run_id, tag)