Пример #1
0
    def _list_experiments(self,
                          session,
                          ids=None,
                          names=None,
                          view_type=ViewType.ACTIVE_ONLY,
                          eager=False):
        """
        :param eager: If ``True``, eagerly loads each experiments's tags. If ``False``, these tags
                      are not eagerly loaded and will be loaded if/when their corresponding
                      object properties are accessed from a resulting ``SqlExperiment`` object.
        """
        stages = LifecycleStage.view_type_to_stages(view_type)
        conditions = [SqlExperiment.lifecycle_stage.in_(stages)]
        if ids and len(ids) > 0:
            int_ids = [int(eid) for eid in ids]
            conditions.append(SqlExperiment.experiment_id.in_(int_ids))
        if names and len(names) > 0:
            conditions.append(SqlExperiment.name.in_(names))

        query_options = self._get_eager_experiment_query_options(
        ) if eager else []
        return session \
            .query(SqlExperiment) \
            .options(*query_options) \
            .filter(*conditions) \
            .all()
Пример #2
0
    def _search_runs(self, experiment_ids, filter_string, run_view_type,
                     max_results, order_by, page_token):
        # TODO: push search query into backend database layer
        if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
            raise MlflowException(
                "Invalid value for request parameter max_results. It must be at "
                "most {}, but got value {}".format(
                    SEARCH_MAX_RESULTS_THRESHOLD, max_results),
                INVALID_PARAMETER_VALUE)

        stages = set(LifecycleStage.view_type_to_stages(run_view_type))
        with self.ManagedSessionMaker() as session:
            # Fetch the appropriate runs and eagerly load their summary metrics, params, and
            # tags. These run attributes are referenced during the invocation of
            # ``run.to_mlflow_entity()``, so eager loading helps avoid additional database queries
            # that are otherwise executed at attribute access time under a lazy loading model.
            queried_runs = session \
                .query(SqlRun) \
                .options(*self._get_eager_run_query_options()) \
                .filter(
                    SqlRun.experiment_id.in_(experiment_ids),
                    SqlRun.lifecycle_stage.in_(stages)) \
                .all()
            runs = [run.to_mlflow_entity() for run in queried_runs]

        filtered = SearchUtils.filter(runs, filter_string)
        sorted_runs = SearchUtils.sort(filtered, order_by)
        runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token,
                                                     max_results)
        return runs, next_page_token
Пример #3
0
    def _get_experiment(self, session, experiment_id, view_type, eager=False):
        """
        :param eager: If ``True``, eagerly loads the experiments's tags. If ``False``, these tags
                      are not eagerly loaded and will be loaded if/when their corresponding
                      object properties are accessed from the resulting ``SqlExperiment`` object.
        """
        experiment_id = experiment_id or SqlAlchemyStore.DEFAULT_EXPERIMENT_ID
        stages = LifecycleStage.view_type_to_stages(view_type)
        query_options = self._get_eager_experiment_query_options(
        ) if eager else []

        experiment = session \
            .query(SqlExperiment) \
            .options(*query_options) \
            .filter(
                SqlExperiment.experiment_id == experiment_id,
                SqlExperiment.lifecycle_stage.in_(stages)) \
            .one_or_none()

        if experiment is None:
            raise MlflowException(
                'No Experiment with id={} exists'.format(experiment_id),
                RESOURCE_DOES_NOT_EXIST)

        return experiment
Пример #4
0
class SqlRun(Base):
    __tablename__ = 'runs'

    run_uuid = Column(String(32), nullable=False)
    name = Column(String(250))
    source_type = Column(String(20),
                         default=SourceType.to_string(SourceType.LOCAL))
    source_name = Column(String(500))
    entry_point_name = Column(String(50))
    user_id = Column(String(256), nullable=True, default=None)
    status = Column(String(20),
                    default=RunStatus.to_string(RunStatus.SCHEDULED))
    start_time = Column(BigInteger, default=int(time.time()))
    end_time = Column(BigInteger, nullable=True, default=None)
    source_version = Column(String(50))
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    artifact_uri = Column(String(200), default=None)
    experiment_id = Column(Integer, ForeignKey('experiments.experiment_id'))
    experiment = relationship('SqlExperiment',
                              backref=backref('runs', cascade='all'))

    __table_args__ = (CheckConstraint(source_type.in_(SourceTypes),
                                      name='source_type'),
                      CheckConstraint(status.in_(RunStatusTypes),
                                      name='status'),
                      CheckConstraint(lifecycle_stage.in_(
                          LifecycleStage.view_type_to_stages(ViewType.ALL)),
                                      name='lifecycle_stage'),
                      PrimaryKeyConstraint('run_uuid', name='run_pk'))

    def to_mlflow_entity(self):
        # run has diff parameter names in __init__ than in properties_ so we do this manually
        info = _create_entity(RunInfo, self)
        data = _create_entity(RunData, self)
        return Run(run_info=info, run_data=data)
Пример #5
0
 def _list_run_infos(self, experiment_id, view_type):
     self._check_root_dir()
     if not self._has_experiment(experiment_id):
         return []
     experiment_dir = self._get_experiment_path(experiment_id,
                                                assert_exists=True)
     run_uuids = list_all(
         experiment_dir,
         filter_func=lambda x: all([
             os.path.basename(os.path.normpath(x)) != reservedFolderName
             for reservedFolderName in FileStore.RESERVED_EXPERIMENT_FOLDERS
         ]) and os.path.isdir(x),
         full_path=False)
     run_infos = []
     for r_id in run_uuids:
         try:
             # trap and warn known issues, will raise unexpected exceptions to caller
             run_info = self._get_run_info(r_id)
             if run_info is None:
                 continue
             if LifecycleStage.matches_view_type(view_type,
                                                 run_info.lifecycle_stage):
                 run_infos.append(run_info)
         except MissingConfigException as rnfe:
             # trap malformed run exception and log warning
             logging.warning("Malformed run '%s'. Detailed error %s",
                             r_id,
                             str(rnfe),
                             exc_info=True)
     return run_infos
Пример #6
0
    def _list_experiments(self, experiments, view_type=ViewType.ACTIVE_ONLY):
        stages = LifecycleStage.view_type_to_stages(view_type)
        conditions = [SqlExperiment.lifecycle_stage.in_(stages)]

        if len(experiments) > 0:
            conditions.append(SqlExperiment.experiment_id.in_(experiments))

        return self.session.query(SqlExperiment).filter(*conditions)
Пример #7
0
    def _list_experiments(
        self,
        ids=None,
        names=None,
        view_type=ViewType.ACTIVE_ONLY,
        max_results=None,
        page_token=None,
        eager=False,
    ):
        """
        :param eager: If ``True``, eagerly loads each experiments's tags. If ``False``, these tags
                      are not eagerly loaded and will be loaded if/when their corresponding
                      object properties are accessed from a resulting ``SqlExperiment`` object.
        """
        stages = LifecycleStage.view_type_to_stages(view_type)
        conditions = [SqlExperiment.lifecycle_stage.in_(stages)]
        if ids and len(ids) > 0:
            int_ids = [int(eid) for eid in ids]
            conditions.append(SqlExperiment.experiment_id.in_(int_ids))
        if names and len(names) > 0:
            conditions.append(SqlExperiment.name.in_(names))

        max_results_for_query = None
        if max_results is not None:
            max_results_for_query = max_results + 1

            def compute_next_token(current_size):
                next_token = None
                if max_results_for_query == current_size:
                    final_offset = offset + max_results
                    next_token = SearchUtils.create_page_token(final_offset)

                return next_token

        with self.ManagedSessionMaker() as session:
            query_options = self._get_eager_experiment_query_options(
            ) if eager else []
            if max_results is not None:
                offset = SearchUtils.parse_start_offset_from_page_token(
                    page_token)
                queried_experiments = (session.query(SqlExperiment).options(
                    *query_options).order_by(
                        SqlExperiment.experiment_id).filter(
                            *conditions).offset(offset).limit(
                                max_results_for_query).all())
            else:
                queried_experiments = (session.query(SqlExperiment).options(
                    *query_options).filter(*conditions).all())

            experiments = [
                exp.to_mlflow_entity() for exp in queried_experiments
            ]
        if max_results is not None:
            return PagedList(experiments[:max_results],
                             compute_next_token(len(experiments)))
        else:
            return PagedList(experiments, None)
Пример #8
0
    def _search_runs(
        self, experiment_ids, filter_string, run_view_type, max_results, order_by, page_token
    ):
        def compute_next_token(current_size):
            next_token = None
            if max_results == current_size:
                final_offset = offset + max_results
                next_token = SearchUtils.create_page_token(final_offset)

            return next_token

        if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
            raise MlflowException(
                "Invalid value for request parameter max_results. It must be at "
                "most {}, but got value {}".format(SEARCH_MAX_RESULTS_THRESHOLD, max_results),
                INVALID_PARAMETER_VALUE,
            )

        stages = set(LifecycleStage.view_type_to_stages(run_view_type))

        with self.ManagedSessionMaker() as session:
            # Fetch the appropriate runs and eagerly load their summary metrics, params, and
            # tags. These run attributes are referenced during the invocation of
            # ``run.to_mlflow_entity()``, so eager loading helps avoid additional database queries
            # that are otherwise executed at attribute access time under a lazy loading model.
            parsed_filters = SearchUtils.parse_search_filter(filter_string)
            parsed_orderby, sorting_joins = _get_orderby_clauses(order_by, session)

            query = session.query(SqlRun)
            for j in _get_sqlalchemy_filter_clauses(parsed_filters, session):
                query = query.join(j)
            # using an outer join is necessary here because we want to be able to sort
            # on a column (tag, metric or param) without removing the lines that
            # do not have a value for this column (which is what inner join would do)
            for j in sorting_joins:
                query = query.outerjoin(j)

            offset = SearchUtils.parse_start_offset_from_page_token(page_token)
            queried_runs = (
                query.distinct()
                .options(*self._get_eager_run_query_options())
                .filter(
                    SqlRun.experiment_id.in_(experiment_ids),
                    SqlRun.lifecycle_stage.in_(stages),
                    *_get_attributes_filtering_clauses(parsed_filters)
                )
                .order_by(*parsed_orderby)
                .offset(offset)
                .limit(max_results)
                .all()
            )

            runs = [run.to_mlflow_entity() for run in queried_runs]
            next_page_token = compute_next_token(len(runs))

        return runs, next_page_token
Пример #9
0
    def _list_experiments(self, session, ids=None, names=None, view_type=ViewType.ACTIVE_ONLY):
        stages = LifecycleStage.view_type_to_stages(view_type)
        conditions = [SqlExperiment.lifecycle_stage.in_(stages)]

        if ids and len(ids) > 0:
            conditions.append(SqlExperiment.experiment_id.in_(ids))

        if names and len(names) > 0:
            conditions.append(SqlExperiment.name.in_(names))

        return session.query(SqlExperiment).filter(*conditions)
 def get_experiment_by_name(self, experiment_name):
     """
     Specialized implementation for SQL backed store.
     """
     with self.ManagedSessionMaker() as session:
         stages = LifecycleStage.view_type_to_stages(ViewType.ALL)
         experiment = (session.query(SqlExperiment).options(
             *self._get_eager_experiment_query_options()).filter(
                 SqlExperiment.name == experiment_name,
                 SqlExperiment.lifecycle_stage.in_(stages)).one_or_none())
         return experiment.to_mlflow_entity(
         ) if experiment is not None else None
Пример #11
0
class SqlExperiment(Base):
    """
    DB model for :py:class:`mlflow.entities.Experiment`. These are recorded in ``experiment`` table.
    """

    __tablename__ = "experiments"

    experiment_id = Column(Integer, autoincrement=True)
    """
    Experiment ID: `Integer`. *Primary Key* for ``experiment`` table.
    """
    name = Column(String(256), unique=True, nullable=False)
    """
    Experiment name: `String` (limit 256 characters). Defined as *Unique* and *Non null* in
                     table schema.
    """
    artifact_location = Column(String(256), nullable=True)
    """
    Default artifact location for this experiment: `String` (limit 256 characters). Defined as
                                                    *Non null* in table schema.
    """
    lifecycle_stage = Column(String(32), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of experiment: `String` (limit 32 characters).
                                    Can be either ``active`` (default) or ``deleted``.
    """

    __table_args__ = (
        CheckConstraint(
            lifecycle_stage.in_(
                LifecycleStage.view_type_to_stages(ViewType.ALL)),
            name="experiments_lifecycle_stage",
        ),
        PrimaryKeyConstraint("experiment_id", name="experiment_pk"),
    )

    def __repr__(self):
        return "<SqlExperiment ({}, {})>".format(self.experiment_id, self.name)

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Experiment`.
        """
        return Experiment(
            experiment_id=str(self.experiment_id),
            name=self.name,
            artifact_location=self.artifact_location,
            lifecycle_stage=self.lifecycle_stage,
            tags=[t.to_mlflow_entity() for t in self.tags],
        )
Пример #12
0
    def _get_run(self, run_uuid, view_type):
        stages = LifecycleStage.view_type_to_stages(view_type)
        runs = self.session.query(SqlRun).filter(
            SqlRun.run_uuid == run_uuid,
            SqlRun.lifecycle_stage.in_(stages)).all()

        if len(runs) == 0:
            raise MlflowException('No runs with id={} exists'.format(run_uuid),
                                  RESOURCE_DOES_NOT_EXIST)
        if len(runs) > 1:
            raise MlflowException(
                'Expected only 1 run with id={}. Found {}.'.format(
                    run_uuid, len(runs)), INVALID_STATE)

        return runs[0]
Пример #13
0
class SqlExperiment(Base):
    __tablename__ = 'experiments'

    experiment_id = Column(Integer, autoincrement=True)
    name = Column(String(256), unique=True, nullable=False)
    artifact_location = Column(String(256), nullable=True)
    lifecycle_stage = Column(String(32), default=LifecycleStage.ACTIVE)

    __table_args__ = (CheckConstraint(lifecycle_stage.in_(
        LifecycleStage.view_type_to_stages(ViewType.ALL)),
                                      name='lifecycle_stage'),
                      PrimaryKeyConstraint('experiment_id',
                                           name='experiment_pk'))

    def __repr__(self):
        return '<SqlExperiment ({}, {})>'.format(self.experiment_id, self.name)

    def to_mlflow_entity(self):
        return _create_entity(Experiment, self)
Пример #14
0
 def _list_run_infos(self, experiment_id, view_type):
     self._check_root_dir()
     if not self._has_experiment(experiment_id):
         return []
     experiment_dir = self._get_experiment_path(experiment_id, assert_exists=True)
     run_uuids = list_all(experiment_dir, os.path.isdir, full_path=False)
     run_infos = []
     for r_id in run_uuids:
         try:
             # trap and warn known issues, will raise unexpected exceptions to caller
             run_info = self._get_run_info(r_id)
             if run_info is None:
                 continue
             if LifecycleStage.matches_view_type(view_type, run_info.lifecycle_stage):
                 run_infos.append(run_info)
         except MissingConfigException as rnfe:
             # trap malformed run exception and log warning
             logging.warning("Malformed run '%s'. Detailed error %s", r_id, str(rnfe),
                             exc_info=True)
     return run_infos
Пример #15
0
 def _list_run_infos(self, experiment_id, view_type):
     self._check_root_dir()
     if not self._has_experiment(experiment_id):
         return []
     experiment_dir = self._get_experiment_path(experiment_id,
                                                assert_exists=True)
     run_dirs = list_all(
         experiment_dir,
         filter_func=lambda x: all([
             os.path.basename(os.path.normpath(x)) != reservedFolderName
             for reservedFolderName in FileStore.RESERVED_EXPERIMENT_FOLDERS
         ]) and os.path.isdir(x),
         full_path=True,
     )
     run_infos = []
     for r_dir in run_dirs:
         try:
             # trap and warn known issues, will raise unexpected exceptions to caller
             run_info = self._get_run_info_from_dir(r_dir)
             if run_info.experiment_id != experiment_id:
                 logging.warning(
                     "Wrong experiment ID (%s) recorded for run '%s'. "
                     "It should be %s. Run will be ignored.",
                     str(run_info.experiment_id),
                     str(run_info.run_id),
                     str(experiment_id),
                     exc_info=True,
                 )
                 continue
             if LifecycleStage.matches_view_type(view_type,
                                                 run_info.lifecycle_stage):
                 run_infos.append(run_info)
         except MissingConfigException as rnfe:
             # trap malformed run exception and log warning
             r_id = os.path.basename(r_dir)
             logging.warning("Malformed run '%s'. Detailed error %s",
                             r_id,
                             str(rnfe),
                             exc_info=True)
     return run_infos
Пример #16
0
class SqlRun(Base):
    """
    DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table.
    """
    __tablename__ = 'runs'

    run_uuid = Column(String(32), nullable=False)
    """
    Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table.
    """
    name = Column(String(250))
    """
    Run name: `String` (limit 250 characters).
    """
    source_type = Column(String(20), default=SourceType.to_string(SourceType.LOCAL))
    """
    Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``,
                 ``LOCAL`` (default), or ``UNKNOWN``.
    """
    source_name = Column(String(500))
    """
    Name of source recording the run: `String` (limit 500 characters).
    """
    entry_point_name = Column(String(50))
    """
    Entry-point name that launched the run run: `String` (limit 50 characters).
    """
    user_id = Column(String(256), nullable=True, default=None)
    """
    User ID: `String` (limit 256 characters). Defaults to ``null``.
    """
    status = Column(String(20), default=RunStatus.to_string(RunStatus.SCHEDULED))
    """
    Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default),
                ``FINISHED``, ``FAILED``.
    """
    start_time = Column(BigInteger, default=int(time.time()))
    """
    Run start time: `BigInteger`. Defaults to current system time.
    """
    end_time = Column(BigInteger, nullable=True, default=None)
    """
    Run end time: `BigInteger`.
    """
    source_version = Column(String(50))
    """
    Source version: `String` (limit 50 characters).
    """
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of run: `String` (limit 32 characters).
                            Can be either ``active`` (default) or ``deleted``.
    """
    artifact_uri = Column(String(200), default=None)
    """
    Default artifact location for this run: `String` (limit 200 characters).
    """
    experiment_id = Column(Integer, ForeignKey('experiments.experiment_id'))
    """
    Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table.
    """
    experiment = relationship('SqlExperiment', backref=backref('runs', cascade='all'))
    """
    SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`.
    """

    __table_args__ = (
        CheckConstraint(source_type.in_(SourceTypes), name='source_type'),
        CheckConstraint(status.in_(RunStatusTypes), name='status'),
        CheckConstraint(lifecycle_stage.in_(LifecycleStage.view_type_to_stages(ViewType.ALL)),
                        name='lifecycle_stage'),
        PrimaryKeyConstraint('run_uuid', name='run_pk')
    )

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        # run has diff parameter names in __init__ than in properties_ so we do this manually
        info = _create_entity(RunInfo, self)
        data = _create_entity(RunData, self)
        return Run(run_info=info, run_data=data)
Пример #17
0
class SqlRun(Base):
    """
    DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table.
    """

    __tablename__ = "runs"

    run_uuid = Column(String(32), nullable=False)
    """
    Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table.
    """
    name = Column(String(250))
    """
    Run name: `String` (limit 250 characters).
    """
    source_type = Column(String(20),
                         default=SourceType.to_string(SourceType.LOCAL))
    """
    Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``,
                 ``LOCAL`` (default), or ``UNKNOWN``.
    """
    source_name = Column(String(500))
    """
    Name of source recording the run: `String` (limit 500 characters).
    """
    entry_point_name = Column(String(50))
    """
    Entry-point name that launched the run run: `String` (limit 50 characters).
    """
    user_id = Column(String(256), nullable=True, default=None)
    """
    User ID: `String` (limit 256 characters). Defaults to ``null``.
    """
    status = Column(String(20),
                    default=RunStatus.to_string(RunStatus.SCHEDULED))
    """
    Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default),
                ``FINISHED``, ``FAILED``.
    """
    start_time = Column(BigInteger, default=int(time.time()))
    """
    Run start time: `BigInteger`. Defaults to current system time.
    """
    end_time = Column(BigInteger, nullable=True, default=None)
    """
    Run end time: `BigInteger`.
    """
    source_version = Column(String(50))
    """
    Source version: `String` (limit 50 characters).
    """
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of run: `String` (limit 32 characters).
                            Can be either ``active`` (default) or ``deleted``.
    """
    artifact_uri = Column(String(200), default=None)
    """
    Default artifact location for this run: `String` (limit 200 characters).
    """
    experiment_id = Column(Integer, ForeignKey("experiments.experiment_id"))
    """
    Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table.
    """
    experiment = relationship("SqlExperiment",
                              backref=backref("runs", cascade="all"))
    """
    SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`.
    """

    __table_args__ = (
        CheckConstraint(source_type.in_(SourceTypes), name="source_type"),
        CheckConstraint(status.in_(RunStatusTypes), name="status"),
        CheckConstraint(
            lifecycle_stage.in_(
                LifecycleStage.view_type_to_stages(ViewType.ALL)),
            name="runs_lifecycle_stage",
        ),
        PrimaryKeyConstraint("run_uuid", name="run_pk"),
    )

    @staticmethod
    def get_attribute_name(mlflow_attribute_name):
        """
        Resolves an MLflow attribute name to a `SqlRun` attribute name.
        """
        # Currently, MLflow Search attributes defined in `SearchUtils.VALID_SEARCH_ATTRIBUTE_KEYS`
        # share the same names as their corresponding `SqlRun` attributes. Therefore, this function
        # returns the same attribute name
        return mlflow_attribute_name

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(
            run_uuid=self.run_uuid,
            run_id=self.run_uuid,
            experiment_id=str(self.experiment_id),
            user_id=self.user_id,
            status=self.status,
            start_time=self.start_time,
            end_time=self.end_time,
            lifecycle_stage=self.lifecycle_stage,
            artifact_uri=self.artifact_uri,
        )

        run_data = RunData(
            metrics=[m.to_mlflow_entity() for m in self.latest_metrics],
            params=[p.to_mlflow_entity() for p in self.params],
            tags=[t.to_mlflow_entity() for t in self.tags],
        )

        return Run(run_info=run_info, run_data=run_data)
Пример #18
0
    RunStatus.to_string(RunStatus.SCHEDULED),
    RunStatus.to_string(RunStatus.FAILED),
    RunStatus.to_string(RunStatus.FINISHED),
    RunStatus.to_string(RunStatus.RUNNING),
    RunStatus.to_string(RunStatus.KILLED)
]

# Certain SQL backends (e.g., SQLite) do not preserve CHECK constraints during migrations.
# For these backends, CHECK constraints must be specified as table arguments. Here, we define
# the collection of CHECK constraints that should be preserved when performing the migration.
# The "status" constraint is excluded from this set because it is explicitly modified
# within the migration's `upgrade()` routine.
check_constraint_table_args = [
    CheckConstraint(SqlRun.source_type.in_(SourceTypes), name='source_type'),
    CheckConstraint(SqlRun.lifecycle_stage.in_(
        LifecycleStage.view_type_to_stages(ViewType.ALL)),
                    name='runs_lifecycle_stage'),
]


def upgrade():
    with op.batch_alter_table(
            "runs", table_args=check_constraint_table_args) as batch_op:
        # Transform the "status" column to an `Enum` and define a new check constraint. Specify
        # `native_enum=False` to create a check constraint rather than a
        # database-backend-dependent enum (see https://docs.sqlalchemy.org/en/13/core/
        # type_basics.html#sqlalchemy.types.Enum.params.native_enum)
        batch_op.alter_column("status",
                              type_=Enum(*new_run_statuses,
                                         create_constraint=True,
                                         native_enum=False))
Пример #19
0
class SqlRun(Base):
    """
    DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table.
    """
    __tablename__ = 'runs'

    run_uuid = Column(String(32), nullable=False)
    """
    Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table.
    """
    name = Column(String(250))
    """
    Run name: `String` (limit 250 characters).
    """
    source_type = Column(String(20),
                         default=SourceType.to_string(SourceType.LOCAL))
    """
    Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``,
                 ``LOCAL`` (default), or ``UNKNOWN``.
    """
    source_name = Column(String(500))
    """
    Name of source recording the run: `String` (limit 500 characters).
    """
    entry_point_name = Column(String(50))
    """
    Entry-point name that launched the run run: `String` (limit 50 characters).
    """
    user_id = Column(String(256), nullable=True, default=None)
    """
    User ID: `String` (limit 256 characters). Defaults to ``null``.
    """
    status = Column(String(20),
                    default=RunStatus.to_string(RunStatus.SCHEDULED))
    """
    Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default),
                ``FINISHED``, ``FAILED``.
    """
    start_time = Column(BigInteger, default=int(time.time()))
    """
    Run start time: `BigInteger`. Defaults to current system time.
    """
    end_time = Column(BigInteger, nullable=True, default=None)
    """
    Run end time: `BigInteger`.
    """
    source_version = Column(String(50))
    """
    Source version: `String` (limit 50 characters).
    """
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of run: `String` (limit 32 characters).
                            Can be either ``active`` (default) or ``deleted``.
    """
    artifact_uri = Column(String(200), default=None)
    """
    Default artifact location for this run: `String` (limit 200 characters).
    """
    experiment_id = Column(Integer, ForeignKey('experiments.experiment_id'))
    """
    Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table.
    """
    experiment = relationship('SqlExperiment',
                              backref=backref('runs', cascade='all'))
    """
    SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`.
    """

    __table_args__ = (CheckConstraint(source_type.in_(SourceTypes),
                                      name='source_type'),
                      CheckConstraint(status.in_(RunStatusTypes),
                                      name='status'),
                      CheckConstraint(lifecycle_stage.in_(
                          LifecycleStage.view_type_to_stages(ViewType.ALL)),
                                      name='runs_lifecycle_stage'),
                      PrimaryKeyConstraint('run_uuid', name='run_pk'))

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        all_metrics = [m.to_mlflow_entity() for m in self.metrics]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)
Пример #20
0
 def _list_runs(self, experiment_id, run_view_type):
     exp = self._list_experiments(ids=[experiment_id],
                                  view_type=ViewType.ALL).first()
     stages = set(LifecycleStage.view_type_to_stages(run_view_type))
     return [run for run in exp.runs if run.lifecycle_stage in stages]