def runs_start(run_id: int, run: Optional[BaseRun]): run = get_run(run_id=run_id, run=run) if not run: return if not run.is_managed: return if not LifeCycle.is_compiled(run.status): _logger.info( "Run `%s` cannot transition from `%s` to `%s`.", run_id, run.status, V1Statuses.QUEUED, ) return condition = V1StatusCondition.get_condition( type=V1Statuses.QUEUED, status="True", reason="PolyaxonRunQueued", message="Run is queued", ) new_run_status(run=run, condition=condition) def _log_error(exc: Exception, message: str = None): message = message or "Could not start the operation.\n" message += "error: {}\n{}".format(repr(exc), traceback.format_exc()) cond = V1StatusCondition.get_condition( type=V1Statuses.FAILED, status="True", reason="PolyaxonRunFailed", message=message, ) new_run_status(run=run, condition=cond) try: in_cluster = conf.get(K8S_IN_CLUSTER) if in_cluster and (run.is_service or run.is_job): manager.start( content=run.content, owner_name=run.project.owner.name, project_name=run.project.name, run_name=run.name, run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, default_auth=False, ) return except (PolyaxonK8SError, ApiException) as e: _log_error( exc=e, message="Kubernetes manager could not start the operation.\n") except PolypodException as e: _log_error(exc=e, message="Failed converting the run manifest.\n") except Exception as e: _log_error(exc=e, message="Failed with unknown exception.\n")
def runs_start(run_id: int, run: Optional[BaseRun]): run = get_run(run_id=run_id, run=run) if not run: return if not run.is_managed: return if not LifeCycle.is_compiled(run.status): _logger.info( "Run `%s` cannot transition from `%s` to `%s`.", run_id, run.status, V1Statuses.QUEUED, ) return condition = V1StatusCondition.get_condition( type=V1Statuses.QUEUED, status="True", reason="PolyaxonRunQueued", message="Run is queued", ) new_run_status(run=run, condition=condition) try: in_cluster = conf.get(K8S_IN_CLUSTER) if in_cluster and (run.is_service or run.is_job): manager.start( content=run.content, owner_name=run.project.owner.name, project_name=run.project.name, run_name=run.name, run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, default_auth=False, ) except PolyaxonK8SError as e: condition = V1StatusCondition.get_condition( type=V1Statuses.FAILED, status="True", reason="PolyaxonRunFailed", message="Could not start the job {}".format(e), ) new_run_status(run=run, condition=condition)