Пример #1
0
    def test_new_stopped_status_after_stopping(self):
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(type=V1Statuses.STOPPING,
                                                      status=True,
                                                      reason="foo"),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 1

        # Same this condition
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(type=V1Statuses.RUNNING,
                                                      status=True,
                                                      reason="foo"),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 1

        # Different condition's message
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.STOPPED,
                status=True,
                reason="foo",
                message="New message",
            ),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 2
Пример #2
0
    def test_status_transition(self):
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.SCHEDULED, status=True, reason="foo"),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 1

        # New running condition
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.RUNNING,
                status=True,
                reason="foo",
                message="New message",
            ),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 2

        # New warning condition
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.WARNING,
                status=True,
                reason="foo",
                message="New message",
            ),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 3

        # New running condition
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.RUNNING,
                status=True,
                reason="foo",
                message="New message",
            ),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 4

        # New warning condition
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.WARNING,
                status=True,
                reason="foo",
                message="New message",
            ),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 5
Пример #3
0
def runs_prepare(run_id: int,
                 run: Optional[BaseRun],
                 eager: bool = False) -> bool:
    run = get_run(run_id=run_id, run=run)
    if not run:
        return False

    if not LifeCycle.is_compilable(run.status):
        _logger.info(
            "Run `%s` cannot transition from `%s` to `%s`.",
            run_id,
            run.status,
            V1Statuses.COMPILED,
        )
        return False

    try:
        compiled_at = now()
        _, compiled_operation = resolver.resolve(run=run,
                                                 compiled_at=compiled_at,
                                                 eager=eager)
    except PolyaxonCompilerError as e:
        condition = V1StatusCondition.get_condition(
            type=V1Statuses.FAILED,
            status="True",
            reason="SchedulerPrepare",
            message=f"Failed to compile.\n{e}",
        )
        new_run_status(run=run, condition=condition)
        return False
    except Exception as e:
        condition = V1StatusCondition.get_condition(
            type=V1Statuses.FAILED,
            status="True",
            reason="SchedulerPrepare",
            message=f"Compiler received an internal error.\n{e}",
        )
        new_run_status(run=run, condition=condition)
        return False

    condition = V1StatusCondition.get_condition(
        type=V1Statuses.COMPILED,
        status="True",
        reason="SchedulerPrepare",
        message="Run is compiled",
        last_update_time=compiled_at,
    )
    new_run_status(run=run, condition=condition)

    if run.pending:
        return False

    if eager:
        runs_start(run_id=run.id, run=run)
        return False

    return True
Пример #4
0
def resume_run(
    run: BaseRun,
    user_id: int = None,
    name: str = None,
    description: str = None,
    content: str = None,
    readme: str = None,
    tags: List[str] = None,
) -> BaseRun:
    op_spec = V1Operation.read(run.raw_content)
    compiled_operation, instance = operations.init_run(
        project_id=run.project_id,
        user_id=user_id or run.user_id,
        name=name or run.name,
        description=description or run.description,
        readme=readme or run.readme,
        op_spec=op_spec,
        tags=tags or run.tags,
        override=content,
    )

    run.user_id = instance.user_id
    run.name = instance.name
    run.description = instance.description
    run.readme = instance.readme
    run.content = instance.content
    run.raw_content = instance.raw_content
    run.tags = instance.tags
    run.save()
    new_run_status(
        run,
        condition=V1StatusCondition.get_condition(type=V1Statuses.RESUMING,
                                                  status=True),
    )
    return run
Пример #5
0
 def test_new_run_status_created(self, auditor_record):
     new_run_status(
         self.run,
         condition=V1StatusCondition.get_condition(type=V1Statuses.CREATED,
                                                   status=True),
     )
     assert auditor_record.call_count == 0
Пример #6
0
    def test_get(self):
        resp = self.client.get(self.url)
        assert resp.status_code == status.HTTP_200_OK

        data = resp.data
        assert len(data["status_conditions"]) == 0
        assert data == self.serializer_class(self.object).data

        new_run_status(
            self.object,
            condition=V1StatusCondition.get_condition(type=V1Statuses.RUNNING,
                                                      status=True),
        )
        self.object.refresh_from_db()
        resp = self.client.get(self.url)
        assert resp.status_code == status.HTTP_200_OK

        data = resp.data
        assert len(data["status_conditions"]) == 1
        assert data == self.serializer_class(self.object).data

        new_run_stop_status(run=self.object, message="foo")
        self.object.refresh_from_db()
        resp = self.client.get(self.url)
        assert resp.status_code == status.HTTP_200_OK

        data = resp.data
        assert len(data["status_conditions"]) == 2
        assert data == self.serializer_class(self.object).data
Пример #7
0
def create_run(
    project_id: int,
    user_id: int,
    name: str = None,
    description: str = None,
    readme: str = None,
    tags: List[int] = None,
    raw_content: str = None,
) -> BaseRun:
    instance = get_run_model().objects.create(
        project_id=project_id,
        user_id=user_id,
        name=name,
        description=description,
        readme=readme,
        tags=tags,
        kind=V1RunKind.JOB,
        is_managed=False,
        raw_content=raw_content,
        status_conditions=[
            V1StatusCondition.get_condition(
                type=V1Statuses.CREATED,
                status="True",
                reason="PolyaxonRunCreated",
                message="Run is created",
            ).to_dict()
        ],
    )
    return instance
Пример #8
0
 def setUp(self):
     super().setUp()
     new_run_status(
         self.object,
         condition=V1StatusCondition.get_condition(type=V1Statuses.STOPPED,
                                                   status=True),
     )
Пример #9
0
def runs_start(run_id: int, run: Optional[BaseRun]):
    run = get_run(run_id=run_id, run=run)
    if not run:
        return

    if not run.is_managed:
        return

    if not LifeCycle.is_compiled(run.status):
        _logger.info(
            "Run `%s` cannot transition from `%s` to `%s`.",
            run_id,
            run.status,
            V1Statuses.QUEUED,
        )
        return

    condition = V1StatusCondition.get_condition(
        type=V1Statuses.QUEUED,
        status="True",
        reason="PolyaxonRunQueued",
        message="Run is queued",
    )
    new_run_status(run=run, condition=condition)

    try:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            manager.start(
                content=run.content,
                owner_name=run.project.owner.name,
                project_name=run.project.name,
                run_name=run.name,
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
                default_auth=False,
            )
    except PolyaxonK8SError as e:
        condition = V1StatusCondition.get_condition(
            type=V1Statuses.FAILED,
            status="True",
            reason="PolyaxonRunFailed",
            message="Could not start the job {}".format(e),
        )
        new_run_status(run=run, condition=condition)
Пример #10
0
def runs_start(run_id: int, run: Optional[BaseRun]):
    run = get_run(run_id=run_id, run=run)
    if not run:
        return

    if not run.is_managed:
        return

    if not LifeCycle.is_compiled(run.status):
        _logger.info(
            "Run `%s` cannot transition from `%s` to `%s`.",
            run_id,
            run.status,
            V1Statuses.QUEUED,
        )
        return

    condition = V1StatusCondition.get_condition(
        type=V1Statuses.QUEUED,
        status="True",
        reason="PolyaxonRunQueued",
        message="Run is queued",
    )
    new_run_status(run=run, condition=condition)

    def _log_error(exc: Exception, message: str = None):
        message = message or "Could not start the operation.\n"
        message += "error: {}\n{}".format(repr(exc), traceback.format_exc())
        cond = V1StatusCondition.get_condition(
            type=V1Statuses.FAILED,
            status="True",
            reason="PolyaxonRunFailed",
            message=message,
        )
        new_run_status(run=run, condition=cond)

    try:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            manager.start(
                content=run.content,
                owner_name=run.project.owner.name,
                project_name=run.project.name,
                run_name=run.name,
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
                default_auth=False,
            )
        return
    except (PolyaxonK8SError, ApiException) as e:
        _log_error(
            exc=e,
            message="Kubernetes manager could not start the operation.\n")
    except PolypodException as e:
        _log_error(exc=e, message="Failed converting the run manifest.\n")
    except Exception as e:
        _log_error(exc=e, message="Failed with unknown exception.\n")
Пример #11
0
 def test_start_run(self, manager_start):
     experiment = RunFactory(project=self.project, user=self.user)
     new_run_status(
         run=experiment,
         condition=V1StatusCondition.get_condition(type=V1Statuses.COMPILED,
                                                   status=True),
     )
     runs_start(run_id=experiment.id)
     assert manager_start.call_count == 1
Пример #12
0
    def test_status_update_results_in_new_updated_at_datetime(self):
        updated_at = self.run.updated_at
        # Create new status
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(type=V1Statuses.STARTING,
                                                      status=True),
        )
        assert updated_at < self.run.updated_at
        updated_at = self.run.updated_at

        # Create new status
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(type=V1Statuses.STARTING,
                                                      status=True),
        )
        assert updated_at < self.run.updated_at
Пример #13
0
def new_run_stop_status(run, message):
    # Update run status to show that its stopped
    message = f"Run is stopped; {message}" if message else "Run is stopped"
    condition = V1StatusCondition.get_condition(
        type=V1Statuses.STOPPED,
        status="True",
        reason="PolyaxonRunStopped",
        message=message,
    )
    new_run_status(run=run, condition=condition)
Пример #14
0
 def _log_error(exc: Exception, message: str = None):
     message = message or "Could not start the operation.\n"
     message += "error: {}\n{}".format(repr(exc), traceback.format_exc())
     cond = V1StatusCondition.get_condition(
         type=V1Statuses.FAILED,
         status="True",
         reason="PolyaxonRunFailed",
         message=message,
     )
     new_run_status(run=run, condition=cond)
Пример #15
0
    def test_new_status_equality(self):
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.SCHEDULED, status=True, reason="foo"),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 1

        # Same condition
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.SCHEDULED, status=True, reason="foo"),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 1

        # Different condition's message
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.SCHEDULED,
                status=True,
                reason="foo",
                message="New message",
            ),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 1

        # New condition
        new_run_status(
            self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.RUNNING,
                status=True,
                reason="foo",
                message="New message",
            ),
        )
        self.run.refresh_from_db()
        assert len(self.run.status_conditions) == 2
Пример #16
0
 def log_agent_status(self, status: str, reason: str = None, message: str = None):
     status_condition = V1StatusCondition.get_condition(
         type=status, status=True, reason=reason, message=message
     )
     self.client.agents_v1.create_agent_status(
         owner=self.owner,
         uuid=self.agent_uuid,
         body={"condition": status_condition},
         async_req=True,
     )
Пример #17
0
 def test_new_run_status_scheduled(self, auditor_record):
     new_run_status(
         self.run,
         condition=V1StatusCondition.get_condition(
             type=V1Statuses.SCHEDULED, status=True),
     )
     assert auditor_record.call_count == 1
     call_args, call_kwargs = auditor_record.call_args
     assert call_args == ()
     assert call_kwargs["event_type"] == run_events.RUN_NEW_STATUS
Пример #18
0
 def create_one(self):
     run = super().create_one()
     condition = V1StatusCondition.get_condition(
         type=V1Statuses.RUNNING,
         status="True",
         reason="Run is running",
         message="foo",
     )
     new_run_status(run, condition)
     new_run_stop_status(run, "stopping")
     return run
Пример #19
0
def create_status(view, serializer):
    serializer.is_valid()
    validated_data = serializer.validated_data
    if not validated_data:
        return
    condition = None
    if validated_data.get("condition"):
        condition = V1StatusCondition.get_condition(
            **validated_data.get("condition"))
    if condition:
        new_run_status(run=view.run, condition=condition)
Пример #20
0
def stop_run(view, request, *args, **kwargs):
    if LifeCycle.is_done(view.run.status):
        return Response(status=status.HTTP_200_OK, data={})
    condition = V1StatusCondition.get_condition(
        type=V1Statuses.STOPPING,
        status="True",
        reason="PolyaxonRunStopping",
        message="User requested to stop the run.",
    )
    new_run_status(run=view.run, condition=condition)
    view.audit(request, *args, **kwargs)
    return Response(status=status.HTTP_200_OK, data={})
Пример #21
0
def stop_runs(view, request, actor, *args, **kwargs):
    # Immediate stop
    queryset = (
        get_run_model()
        .objects.filter(project=view.project, uuid__in=request.data.get("uuids", []))
        .filter(status__in=LifeCycle.SAFE_STOP_VALUES)
    )
    condition = V1StatusCondition.get_condition(
        type=V1Statuses.STOPPED,
        status="True",
        reason="EventHandler",
        message="User requested to stop the run.",
    )
    bulk_new_run_status(queryset, condition)

    queryset = (
        get_run_model()
        .objects.filter(project=view.project, uuid__in=request.data.get("uuids", []))
        .exclude(status__in=LifeCycle.DONE_OR_IN_PROGRESS_VALUES)
    )
    runs = [r for r in queryset]
    condition = V1StatusCondition.get_condition(
        type=V1Statuses.STOPPING,
        status="True",
        reason="EventHandler",
        message="User requested to stop the run.",
    )
    bulk_new_run_status(runs, condition)
    for run in runs:
        auditor.record(
            event_type=RUN_STOPPED_ACTOR,
            instance=run,
            actor_id=actor.id,
            actor_name=actor.username,
            owner_id=view.project.owner_id,
            owner_name=view.owner_name,
            project_name=view.project_name,
        )

    return Response(status=status.HTTP_200_OK, data={})
Пример #22
0
def runs_prepare(run_id: int, run: Optional[BaseRun], eager: bool = False):
    run = get_run(run_id=run_id, run=run)
    if not run:
        return

    if not LifeCycle.is_compilable(run.status):
        _logger.info(
            "Run `%s` cannot transition from `%s` to `%s`.",
            run_id,
            run.status,
            V1Statuses.COMPILED,
        )
        return None

    try:
        compiled_at = now()
        _, compiled_operation = resolver.resolve(run=run,
                                                 compiled_at=compiled_at)
    except PolyaxonCompilerError as e:
        condition = V1StatusCondition.get_condition(
            type=V1Statuses.FAILED,
            status="True",
            reason="PolyaxonRunFailed",
            message=f"Run compilation error: {e}",
        )
        new_run_status(run=run, condition=condition)
        return None

    condition = V1StatusCondition.get_condition(
        type=V1Statuses.COMPILED,
        status="True",
        reason="PolyaxonRunCompiler",
        message="Run is compiled",
        last_update_time=compiled_at,
    )
    new_run_status(run=run, condition=condition)

    if eager:
        runs_start(run_id=run.id, run=run)
        return
Пример #23
0
 def test_resume_undone_run(self):
     new_run_status(
         self.object,
         condition=V1StatusCondition.get_condition(type=V1Statuses.RUNNING,
                                                   status=True),
     )
     data = {}
     assert self.queryset.count() == 1
     with patch("polycommon.workers.send") as workers_send:
         resp = self.client.post(self.url + "resume/", data)
     assert resp.status_code == status.HTTP_400_BAD_REQUEST
     assert workers_send.call_count == 0
     assert self.queryset.count() == 1
Пример #24
0
def new_run_stopping_status(run, message) -> bool:
    if LifeCycle.is_done(run.status, progressing=True):
        return False

    message = f"Run is stopping; {message}" if message else "Run is stopping"
    condition = V1StatusCondition.get_condition(
        type=V1Statuses.STOPPING,
        status="True",
        reason="PolyaxonRunStopping",
        message=message,
    )
    new_run_status(run=run, condition=condition)
    return True
Пример #25
0
 def test_new_run_status_succeeded(self, auditor_record):
     new_run_status(
         self.run,
         condition=V1StatusCondition.get_condition(
             type=V1Statuses.SUCCEEDED, status=True),
     )
     assert auditor_record.call_count == 3
     call_args_list = auditor_record.call_args_list
     assert call_args_list[0][0] == ()
     assert call_args_list[1][0] == ()
     assert call_args_list[2][0] == ()
     assert call_args_list[0][1]["event_type"] == run_events.RUN_NEW_STATUS
     assert call_args_list[1][1]["event_type"] == run_events.RUN_SUCCEEDED
     assert call_args_list[2][1]["event_type"] == run_events.RUN_DONE
Пример #26
0
    def test_equality_apply(self):
        eq_cond = EqualityCondition(op="eq")
        neq_cond = EqualityCondition(op="eq", negation=True)

        new_run_status(
            run=self.run,
            condition=V1StatusCondition.get_condition(
                type=V1Statuses.SCHEDULED, status=True
            ),
        )

        # eq
        queryset = eq_cond.apply(
            queryset=Run.objects,
            name="status",
            params=V1Statuses.SCHEDULED,
            query_backend=Q,
            timezone=settings.TIME_ZONE,
        )
        assert queryset.count() == 1

        queryset = eq_cond.apply(
            queryset=Run.objects,
            name="status",
            params=V1Statuses.SUCCEEDED,
            query_backend=Q,
            timezone=settings.TIME_ZONE,
        )
        assert queryset.count() == 0

        # neq
        queryset = neq_cond.apply(
            queryset=Run.objects,
            name="status",
            params=V1Statuses.SCHEDULED,
            query_backend=Q,
            timezone=settings.TIME_ZONE,
        )
        assert queryset.count() == 0

        queryset = neq_cond.apply(
            queryset=Run.objects,
            name="status",
            params=V1Statuses.SUCCEEDED,
            query_backend=Q,
            timezone=settings.TIME_ZONE,
        )
        assert queryset.count() == 1
Пример #27
0
    def test_prepare_run_of_already_skipped_run(self, mock_resolve):
        spec_run = MagicMock(cache=V1Cache(disable=False))
        mock_resolve.return_value = (None, spec_run)

        experiment = RunFactory(project=self.project, user=self.user)
        new_run_status(
            run=experiment,
            condition=V1StatusCondition.get_condition(type=V1Statuses.SKIPPED,
                                                      status=True),
        )

        new_experiment = RunFactory(project=self.project, user=self.user)
        runs_prepare(run_id=new_experiment.id)

        new_experiment.refresh_from_db()
        assert new_experiment.status == V1Statuses.COMPILED
Пример #28
0
    def setUp(self):
        super().setUp()
        self.project = ProjectFactory()
        self.objects = [
            self.factory_class(project=self.project, user=self.user)
            for _ in range(4)
        ]
        for obj in self.objects:
            new_run_status(
                run=obj,
                condition=V1StatusCondition.get_condition(
                    type=V1Statuses.RUNNING, status=True),
            )

        self.url = "/{}/{}/{}/runs/stop/".format(API_V1, self.user.username,
                                                 self.project.name)
Пример #29
0
def notify(kind, owner, project, run_uuid, run_name, condition):
    """Notifier command."""
    import ujson
    from polyaxon.lifecycle import V1StatusCondition
    from polyaxon.notifiers import NOTIFIERS, NotificationSpec

    condition = ujson.loads(condition)
    condition = V1StatusCondition.get_condition(**condition)
    notification = NotificationSpec(
        kind=kind,
        owner=owner,
        project=project,
        uuid=run_uuid,
        name=run_name,
        condition=condition,
    )
    NOTIFIERS[kind].execute(notification=notification)
Пример #30
0
def set_entity_status(entity, condition: V1StatusCondition):
    entity.status = condition.type

    if condition:
        status_conditions = None
        if entity.status_conditions:
            status_conditions = to_list(entity.status_conditions, check_none=True)
            last_condition = V1StatusCondition.get_condition(**status_conditions[-1])
            if last_condition == condition:
                status_conditions[-1] = condition.to_dict()
            else:
                status_conditions.append(condition.to_dict())
        elif condition:
            status_conditions = [condition.to_dict()]
        if status_conditions:
            entity.status_conditions = status_conditions

    return entity