示例#1
0
class NonInteractiveRunPipelineStep(PipelineRunPipelineStep):
    __tablename__ = "non_interactive_run_pipeline_steps"
    __bind_key__ = "persistent_db"

    experiment_uuid = db.Column(db.String(36),
                                db.ForeignKey("experiments.experiment_uuid"),
                                primary_key=True)
    run_uuid = db.Column(db.String(36),
                         db.ForeignKey("non_interactive_runs.run_uuid"),
                         primary_key=True)
示例#2
0
class Pipeline(BaseModel):
    __tablename__ = "pipelines"

    project_uuid = db.Column(
        db.String(36),
        db.ForeignKey("projects.uuid", ondelete="CASCADE"),
        primary_key=True,
    )
    uuid = db.Column(db.String(36), primary_key=True, nullable=False)
    env_variables = deferred(
        db.Column(JSONB, nullable=False, server_default="{}"))

    # Note that all relationships are lazy=select.
    interactive_sessions = db.relationship("InteractiveSession",
                                           lazy="select",
                                           passive_deletes=True,
                                           cascade="all, delete")
    jobs = db.relationship("Job",
                           lazy="select",
                           passive_deletes=True,
                           cascade="all, delete")
    pipeline_runs = db.relationship("PipelineRun",
                                    lazy="select",
                                    passive_deletes=True,
                                    cascade="all, delete")
示例#3
0
class PipelineRun(db.Model):
    __tablename__ = "pipelineruns"

    uuid = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    id = db.Column(db.Integer(), unique=False)
    job = db.Column(db.ForeignKey("jobs.uuid", ondelete="CASCADE"))
    parameter_json = db.Column(db.JSON, nullable=False)
示例#4
0
class NonInteractivePipelineRun(PipelineRun):
    # https://docs.sqlalchemy.org/en/14/orm/inheritance.html
    # sqlalchemy has 3 kinds of inheritance: joined table, single table,
    # concrete.
    #
    # Concrete is, essentially, not recommended unsless you have a
    # reason to use it. Will also lead to FKs issues if the base table
    # is abstract.
    #
    # "ORM-enabled UPDATEs and DELETEs do not handle joined table
    # inheritance automatically." This means that, for example, that
    # updating a NonInteractivePipelineRun would not allow updating the
    # columns that belong to the InteractiveRun. This means that, for
    # for example, the update_status_db function from the utils module
    # would not work when updating the status of a non interactive run.
    # https://docs.sqlalchemy.org/en/14/orm/session_basics.html#update-and-delete-with-arbitrary-where-clause
    #
    # Single table inheritance is the inheritance of choice, mostly
    # because of the drawbacks of joined table inheritance. Setting the
    # tablename to None will result in using single table inheritance,
    # setting it to a string will result in using joined table
    # inheritance.
    # Note that single table inheritance will NOT create a new table for
    # each "child" of the inheritance.
    __tablename__ = None

    # TODO: verify why the job_uuid should be part of the
    # primary key
    job_uuid = db.Column(db.String(36),
                         db.ForeignKey("jobs.uuid", ondelete="CASCADE"),
                         index=True)

    # To what batch of non interactive runs of a job it belongs. The
    # first time a job runs will produce batch 1, then batch 2, etc.
    job_run_index = db.Column(
        db.Integer,
        nullable=False,
        server_default=text("0"),
    )

    # This run_id is used to identify the pipeline run within the
    # job and maintain a consistent ordering.
    job_run_pipeline_run_index = db.Column(db.Integer, )

    # The pipeline run number across all job runs of a job.
    pipeline_run_index = db.Column(db.Integer, )

    # Parameters with which it was run, so that the history is kept.
    parameters = db.Column(
        JSONB,
        nullable=False,
        # This way migrated entries that did not have this column will
        # still be valid.
        server_default="{}",
    )

    # related to inheriting from PipelineRun
    __mapper_args__ = {
        "polymorphic_identity": "NonInteractivePipelineRun",
    }
示例#5
0
class EnvironmentBuild(BaseModel):
    """State of environment builds.

    Table meant to store the state of the build task of an environment,
    i.e. when we need to build an image starting from a base image plus
    optional sh code. This is not related to keeping track of
    environments or images to decide if a project or pipeline can be
    run.

    """

    __tablename__ = "environment_builds"
    __table_args__ = (Index("uuid_proj_env_index", "project_uuid",
                            "environment_uuid"), )

    # https://stackoverflow.com/questions/63164261/celery-task-id-max-length
    uuid = db.Column(db.String(36), primary_key=True, nullable=False)
    project_uuid = db.Column(
        db.String(36),
        db.ForeignKey("projects.uuid", ondelete="CASCADE"),
        primary_key=True,
        index=True,
    )
    environment_uuid = db.Column(db.String(36), nullable=False, index=True)
    project_path = db.Column(db.String(4096), nullable=False, index=True)
    requested_time = db.Column(db.DateTime, unique=False, nullable=False)
    started_time = db.Column(db.DateTime, unique=False, nullable=True)
    finished_time = db.Column(db.DateTime, unique=False, nullable=True)
    status = db.Column(db.String(15), unique=False, nullable=True)

    def __repr__(self):
        return f"<EnvironmentBuildTask: {self.uuid}>"
示例#6
0
class InteractiveRunImageMapping(BaseModel):
    """Stores mappings between an interactive run and the environment
     images it uses.

    Used to understand if an image can be removed from the docker
    environment if it's not used by a run which is PENDING or STARTED.

    """

    __tablename__ = "interactive_run_image_mapping"
    __table_args__ = (
        UniqueConstraint("run_uuid", "orchest_environment_uuid"),
        UniqueConstraint("run_uuid", "docker_img_id"),
    )

    run_uuid = db.Column(
        db.ForeignKey(InteractiveRun.run_uuid, ondelete="CASCADE"),
        unique=False,
        nullable=False,
        index=True,
        primary_key=True,
    )
    orchest_environment_uuid = db.Column(db.String(36),
                                         unique=False,
                                         nullable=False,
                                         primary_key=True)
    docker_img_id = db.Column(db.String(),
                              unique=False,
                              nullable=False,
                              primary_key=True)

    def __repr__(self):
        return (f"<InteractiveRunImageMapping: {self.run_uuid} | "
                f"{self.orchest_environment_uuid} | "
                f"{self.docker_img_id}>")
示例#7
0
class PipelineRun(db.Model):
    __tablename__ = 'pipelineruns'

    uuid = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    id = db.Column(db.Integer(), unique=False)
    experiment = db.Column(db.ForeignKey("experiments.uuid"))
    parameter_json = db.Column(db.JSON, nullable=False)
示例#8
0
文件: models.py 项目: wasit7/orchest
class NonInteractiveRun(PipelineRun):
    __tablename__ = 'non_interactive_runs'
    __bind_key__ = 'persistent_db'

    experiment_uuid = db.Column(
        db.String(36),
        db.ForeignKey('experiments.experiment_uuid'),
        primary_key=True
    )
    run_uuid = db.Column(
        db.String(36),
        primary_key=True
    )
    # This run_id is used to identify the pipeline run within the
    # experiment and maintain a consistent ordering.
    pipeline_run_id = db.Column(
        db.Integer,
        unique=False,
        nullable=False,
    )
    started_time = db.Column(
        db.DateTime,
        unique=False,
        nullable=True
    )
    finished_time = db.Column(
        db.DateTime,
        unique=False,
        nullable=True
    )

    pipeline_steps = db.relationship('NonInteractiveRunPipelineStep', lazy='joined')
示例#9
0
class InteractiveRunPipelineStep(PipelineRunPipelineStep):
    __tablename__ = "interactive_run_pipeline_steps"

    run_uuid = db.Column(
        db.String(36),
        db.ForeignKey("interactive_runs.run_uuid", ondelete="CASCADE"),
        primary_key=True,
    )
示例#10
0
文件: models.py 项目: wasit7/orchest
class InteractiveRunPipelineStep(PipelineRunPipelineStep):
    __tablename__ = 'interactive_run_pipeline_steps'

    run_uuid = db.Column(
        db.String(36),
        db.ForeignKey('interactive_runs.run_uuid'),
        primary_key=True
    )
示例#11
0
class Pipeline(db.Model):
    __tablename__ = "pipelines"

    uuid = db.Column(db.String(255), nullable=False, primary_key=True)
    project_uuid = db.Column(db.ForeignKey("projects.uuid",
                                           ondelete="CASCADE"),
                             primary_key=True)
    path = db.Column(db.String(255), nullable=False)
示例#12
0
class Pipeline(db.Model):
    __tablename__ = "pipeline"

    uuid = db.Column(db.String(255), nullable=False, primary_key=True)
    project_uuid = db.Column(db.ForeignKey("project.uuid"), primary_key=True)
    path = db.Column(db.String(255), nullable=False)

    __table_args__ = (UniqueConstraint("uuid", "project_uuid"), )
示例#13
0
class Token(db.Model):

    __tablename__ = "tokens"

    token = db.Column(db.String(255))

    user = db.Column(db.String(36), db.ForeignKey("users.uuid"), primary_key=True)

    created = db.Column(
        db.DateTime, unique=False, nullable=False, default=datetime.datetime.utcnow
    )
示例#14
0
class StepStatus(BaseModel, db.Model):
    __tablename__ = 'stepstatus'
    run_uuid = db.Column(db.String(36),
                         db.ForeignKey('runs.run_uuid'),
                         primary_key=True)
    step_uuid = db.Column(db.String(36), primary_key=True)
    status = db.Column(db.String(15), unique=False, nullable=True)
    started_time = db.Column(db.DateTime, unique=False, nullable=True)
    ended_time = db.Column(db.DateTime, unique=False, nullable=True)

    def __repr__(self):
        return f'<StepStatus {self.run_uuid}.{self.step_uuid}>'
示例#15
0
class Commit(db.Model):
    __tablename__ = 'commits'
    
    uuid = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    tag = db.Column(db.String(255), unique=False, nullable=False)
    name = db.Column(db.String(255), unique=False, nullable=False)
    base_image = db.Column(db.ForeignKey("images.name"))
    created = db.Column(db.DateTime, nullable=False, default=datetime.datetime.utcnow)
    building = db.Column(db.Boolean, default=False)

    def __repr__(self):
        return f'<Commit {self.name}:{self.base_image}:{self.uuid}>'
示例#16
0
class PipelineRunStep(BaseModel):
    __tablename__ = "pipeline_run_steps"

    run_uuid = db.Column(
        db.String(36),
        db.ForeignKey("pipeline_runs.uuid", ondelete="CASCADE"),
        primary_key=True,
    )

    step_uuid = db.Column(db.String(36), primary_key=True)
    status = db.Column(db.String(15), unique=False, nullable=True)
    started_time = db.Column(db.DateTime, unique=False, nullable=True)
    finished_time = db.Column(db.DateTime, unique=False, nullable=True)

    def __repr__(self):
        return f"<{self.__class__.__name__}: {self.run_uuid}.{self.step_uuid}>"
示例#17
0
class Token(db.Model):

    __tablename__ = "tokens"

    token = db.Column(db.String(255))

    user = db.Column(db.String(36),
                     db.ForeignKey("users.uuid"),
                     primary_key=True)

    created = db.Column(
        db.DateTime,
        unique=False,
        nullable=False,
        server_default=text("timezone('utc', now())"),
    )
示例#18
0
class PipelineRun(BaseModel):
    __tablename__ = "pipeline_runs"
    __table_args__ = (Index(
        "ix_pipeline_runs_project_uuid_pipeline_uuid",
        "project_uuid",
        "pipeline_uuid",
    ), )

    project_uuid = db.Column(
        db.String(36),
        db.ForeignKey("projects.uuid", ondelete="CASCADE"),
        index=True,
        nullable=False,
    )
    pipeline_uuid = db.Column(db.String(36),
                              index=True,
                              unique=False,
                              nullable=False)
    uuid = db.Column(db.String(36), primary_key=True)
    status = db.Column(db.String(15), unique=False, nullable=True)
    started_time = db.Column(db.DateTime, unique=False, nullable=True)
    finished_time = db.Column(db.DateTime, unique=False, nullable=True)
    type = db.Column(db.String(50))

    pipeline_steps = db.relationship(
        "PipelineRunStep",
        lazy="joined",
        passive_deletes=True,
        cascade="all, delete",
    )
    image_mappings = db.relationship(
        "PipelineRunImageMapping",
        lazy="joined",
        passive_deletes=True,
        cascade="all, delete",
    )

    # related to inheritance, the "type" column will be used to
    # differentiate the different classes of entities
    __mapper_args__ = {
        "polymorphic_identity": "PipelineRun",
        "polymorphic_on": type,
    }

    def __repr__(self):
        return f"<{self.__class__.__name__}: {self.uuid}>"
示例#19
0
文件: models.py 项目: orchest/orchest
class Pipeline(BaseModel):
    __tablename__ = "pipelines"

    uuid = db.Column(db.String(255), nullable=False, primary_key=True)
    project_uuid = db.Column(db.ForeignKey("projects.uuid",
                                           ondelete="CASCADE"),
                             primary_key=True)
    path = db.Column(db.String(255), nullable=False)
    # Can be: READY, MOVING. The status is used
    # to avoid race conditions and inconsistencies when discovering new
    # pipelines or pipelines that were deleted through the filesystem,
    # given that discovery can be concurrent to a pipeline move.
    status = db.Column(
        db.String(15),
        unique=False,
        nullable=False,
        server_default=text("'READY'"),
    )
示例#20
0
class NonInteractivePipelineRun(PipelineRun):
    # https://docs.sqlalchemy.org/en/14/orm/inheritance.html
    # sqlalchemy has 3 kinds of inheritance: joined table, single table,
    # concrete.
    #
    # Concrete is, essentially, not recommended unsless you have a
    # reason to use it. Will also lead to FKs issues if the base table
    # is abstract.
    #
    # "ORM-enabled UPDATEs and DELETEs do not handle joined table
    # inheritance automatically." This means that, for example, that
    # updating a NonInteractivePipelineRun would not allow updating the
    # columns that belong to the InteractiveRun. This means that, for
    # for example, the update_status_db function from the utils module
    # would not work when updating the status of a non interactive run.
    # https://docs.sqlalchemy.org/en/14/orm/session_basics.html#update-and-delete-with-arbitrary-where-clause
    #
    # Single table inheritance is the inheritance of choice, mostly
    # because of the drawbacks of joined table inheritance. Setting the
    # tablename to None will result in using single table inheritance,
    # setting it to a string will result in using joined table
    # inheritance.
    # Note that single table inheritance will NOT create a new table for
    # each "child" of the inheritance.
    __tablename__ = None

    # TODO: verify why the job_uuid should be part of the
    # primary key
    job_uuid = db.Column(
        db.String(36),
        db.ForeignKey("jobs.job_uuid", ondelete="CASCADE"),
    )
    # This run_id is used to identify the pipeline run within the
    # job and maintain a consistent ordering.
    pipeline_run_id = db.Column(
        db.Integer,
        unique=False,
    )

    # related to inheriting from PipelineRun
    __mapper_args__ = {
        "polymorphic_identity": "NonInteractivePipelineRun",
    }
示例#21
0
class Job(db.Model):
    __tablename__ = "jobs"

    name = db.Column(db.String(255), unique=False, nullable=False)
    uuid = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    pipeline_uuid = db.Column(db.String(255), unique=False, nullable=False)
    project_uuid = db.Column(
        db.ForeignKey("project.uuid", ondelete="CASCADE"), unique=False, nullable=False
    )
    pipeline_name = db.Column(db.String(255), unique=False, nullable=False)
    pipeline_path = db.Column(db.String(255), unique=False, nullable=False)
    created = db.Column(
        db.DateTime, nullable=False, server_default=text("timezone('utc', now())")
    )
    strategy_json = db.Column(db.Text, nullable=False)
    draft = db.Column(db.Boolean())

    pipeline_runs = db.relationship(
        "PipelineRun", lazy="joined", passive_deletes=False, cascade="all, delete"
    )
示例#22
0
class InteractiveSession(BaseModel):
    __tablename__ = "interactive_sessions"
    __table_args__ = (Index(
        "ix_interactive_sessions_project_uuid_pipeline_uuid",
        "project_uuid",
        "pipeline_uuid",
    ), )

    project_uuid = db.Column(
        db.String(36),
        db.ForeignKey("projects.uuid", ondelete="CASCADE"),
        primary_key=True,
        index=True,
    )
    pipeline_uuid = db.Column(db.String(36), primary_key=True, index=True)
    status = db.Column(
        db.String(10),
        primary_key=False,
    )
    # Used to connect to Jupyter notebook server.
    jupyter_server_ip = db.Column(
        db.String(15),
        unique=True,
        nullable=True,
    )  # IPv4
    # Used to connect to Jupyter notebook server.
    notebook_server_info = db.Column(
        JSONB,
        unique=True,
        nullable=True,
    )
    # Docker container IDs. Used internally to identify the resources of
    # a specific session.
    container_ids = db.Column(
        JSONB,
        unique=False,
        nullable=True,
    )

    def __repr__(self):
        return f"<Launch {self.pipeline_uuid}>"
示例#23
0
class NonInteractiveRun(PipelineRun):
    __tablename__ = "non_interactive_runs"
    __bind_key__ = "persistent_db"

    # TODO: verify why the experiment_uuid should be part of the
    # primary key
    experiment_uuid = db.Column(
        db.String(36),
        db.ForeignKey("experiments.experiment_uuid", ondelete="CASCADE"),
        primary_key=True,
    )
    # needs to be unique to be a FK constraint for images mappings
    # that can delete on cascade
    run_uuid = db.Column(db.String(36), primary_key=True, unique=True)
    # This run_id is used to identify the pipeline run within the
    # experiment and maintain a consistent ordering.
    pipeline_run_id = db.Column(
        db.Integer,
        unique=False,
        nullable=False,
    )
    started_time = db.Column(db.DateTime, unique=False, nullable=True)
    finished_time = db.Column(db.DateTime, unique=False, nullable=True)

    pipeline_steps = db.relationship(
        "NonInteractiveRunPipelineStep",
        lazy="joined",
        passive_deletes=False,
        cascade="all, delete",
    )
    image_mappings = db.relationship(
        "NonInteractiveRunImageMapping",
        lazy="joined",
        passive_deletes=False,
        cascade="all, delete",
    )
示例#24
0
class Job(BaseModel):
    __tablename__ = "jobs"
    __table_args__ = (Index("ix_jobs_project_uuid_pipeline_uuid",
                            "project_uuid", "pipeline_uuid"), )

    name = db.Column(
        db.String(255),
        unique=False,
        nullable=False,
        # For migrating users.
        server_default=text("'job'"),
    )

    pipeline_name = db.Column(
        db.String(255),
        unique=False,
        nullable=False,
        # For migrating users.
        server_default=text("''"),
    )

    uuid = db.Column(db.String(36), primary_key=True)
    project_uuid = db.Column(
        db.String(36),
        db.ForeignKey("projects.uuid", ondelete="CASCADE"),
        index=True,
        nullable=False,
    )
    pipeline_uuid = db.Column(db.String(36), index=True, nullable=False)

    # Jobs that are to be schedule once (right now) or once in the
    # future will have no schedule (null).
    schedule = db.Column(db.String(100), nullable=True)

    # A list of dictionaries. The length of the list is the number of
    # non interactive runs that will be run, one for each parameters
    # dictinary. A parameter dictionary maps step uuids to a dictionary,
    # containing the parameters of that step for that particular run.
    # [{ <step_uuid>: {"a": 1}, ...}, ...GG]
    parameters = db.Column(
        JSONB,
        nullable=False,
        # This way migrated entries that did not have this column will
        # still be valid. Note that the entries will be stored as a list
        # of dicts.
        server_default="[]",
    )

    # Note that this column also contains the parameters that were
    # stored within the pipeline definition file. These are not the job
    # parameters, but the original ones.
    pipeline_definition = db.Column(
        JSONB,
        nullable=False,
        # This way migrated entries that did not have this column will
        # still be valid.
        server_default="{}",
    )

    pipeline_run_spec = db.Column(
        JSONB,
        nullable=False,
        # This way migrated entries that did not have this column will
        # still be valid.
        server_default="{}",
    )

    # So that we can efficiently look for jobs to run.
    next_scheduled_time = db.Column(TIMESTAMP(timezone=True), index=True)

    # So that we can show the user the last time it was scheduled/run.
    last_scheduled_time = db.Column(TIMESTAMP(timezone=True), index=True)

    # So that we can "stamp" every non interactive run with the
    # execution number it belongs to, e.g. the first time a job runs it
    # will be batch 1, then 2, etc.
    total_scheduled_executions = db.Column(
        db.Integer,
        unique=False,
        server_default=text("0"),
    )

    pipeline_runs = db.relationship(
        "NonInteractivePipelineRun",
        lazy="select",
        # let the db take care of cascading deletions
        # https://docs.sqlalchemy.org/en/13/orm/relationship_api.html#sqlalchemy.orm.relationship.params.passive_deletes
        # A value of True indicates that unloaded child items should not
        # be loaded during a delete operation on the parent. Normally,
        # when a parent item is deleted, all child items are loaded so
        # that they can either be marked as deleted, or have their
        # foreign key to the parent set to NULL. Marking this flag as
        # True usually implies an ON DELETE <CASCADE|SET NULL> rule is
        # in place which will handle updating/deleting child rows on the
        # database side.
        passive_deletes=True,
        # https://docs.sqlalchemy.org/en/14/orm/cascades.html#using-foreign-key-on-delete-cascade-with-orm-relationships
        # In order to use ON DELETE foreign key cascades in conjunction
        # with relationship(), it’s important to note first and foremost
        # that the relationship.cascade setting must still be configured
        # to match the desired “delete” or “set null” behavior
        # Essentially, the specified behaviour in the FK column
        # and the one specified in the relationship must match.
        cascade="all, delete",
        # When querying a job and its runs the runs will be sorted by
        # job schedule number and the index of the pipeline in that job.
        order_by=(
            "[desc(NonInteractivePipelineRun.job_run_index), "
            "desc(NonInteractivePipelineRun.job_run_pipeline_run_index)]"),
    )

    # The status of a job can be DRAFT, PENDING, STARTED, SUCCESS,
    # ABORTED, FAILURE. Jobs start as DRAFT, this indicates that the job
    # has been created but that has not been started by the user. Once a
    # job is started by the user, what happens depends on the type of
    # job. One time jobs become PENDING, and become STARTED once they
    # are run by the scheduler and their pipeline runs are added to the
    # queue. Once they are completed, their status will be SUCCESS, if
    # they are aborted, their status will be set to ABORTED. Recurring
    # jobs, characterized by having a schedule, become STARTED, and can
    # only move to the ABORTED state in case they get cancelled, which
    # implies that the job will not be scheduled anymore. One time jobs
    # which fail to run (the related pipeline runs scheduling fails) are
    # set to FAILURE, this is not related to a failure at the pipeline
    # run level.
    status = db.Column(
        db.String(15),
        unique=False,
        nullable=False,
        # Pre-existing Jobs of migrating users will be set to SUCCESS.
        server_default=text("'SUCCESS'"),
    )

    strategy_json = db.Column(
        JSONB,
        nullable=False,
        server_default="{}",
    )

    env_variables = deferred(
        db.Column(
            JSONB,
            nullable=False,
            server_default="{}",
        ))

    created_time = db.Column(
        db.DateTime,
        unique=False,
        nullable=False,
        index=True,
        # For migrating users.
        server_default=text("timezone('utc', now())"),
    )

    def __repr__(self):
        return f"<Job: {self.uuid}>"