示例#1
0
def list_runtimes(label_selector: str = None):
    runtimes = []
    for kind in RuntimeKinds.runtime_with_handlers():
        runtime_handler = get_runtime_handler(kind)
        resources = runtime_handler.list_resources(label_selector)
        runtimes.append({"kind": kind, "resources": resources})
    return runtimes
示例#2
0
def _build_function(
    db_session,
    auth_info: mlrun.api.schemas.AuthInfo,
    function,
    with_mlrun,
    skip_deployed,
    mlrun_version_specifier,
):
    fn = None
    ready = None
    try:
        fn = new_function(runtime=function)

        run_db = get_run_db_instance(db_session, auth_info.session)
        fn.set_db_connection(run_db)
        fn.save(versioned=False)
        if fn.kind in RuntimeKinds.nuclio_runtimes():
            mlrun.api.api.utils.ensure_function_has_auth_set(fn, auth_info)
            deploy_nuclio_function(fn)
            # deploy only start the process, the get status API is used to check readiness
            ready = False
        else:
            ready = build_runtime(fn, with_mlrun, mlrun_version_specifier,
                                  skip_deployed)
        fn.save(versioned=True)
        logger.info("Fn:\n %s", fn.to_yaml())
    except Exception as err:
        logger.error(traceback.format_exc())
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      reason=f"runtime error: {err}")
    return fn, ready
示例#3
0
def delete_runtimes(label_selector: str = None,
                    force: bool = False,
                    db_session: Session = Depends(deps.get_db_session)):
    for kind in RuntimeKinds.runtime_with_handlers():
        runtime_handler = get_runtime_handler(kind)
        runtime_handler.delete_resources(get_db(), db_session, label_selector,
                                         force)
    return Response(status_code=status.HTTP_204_NO_CONTENT)
示例#4
0
def _cleanup_runtimes():
    logger.debug('Cleaning runtimes')
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            runtime_handler = get_runtime_handler(kind)
            runtime_handler.delete_resources(get_db(), db_session)
    finally:
        close_session(db_session)
示例#5
0
def delete_runtimes(
        label_selector: str = None,
        force: bool = False,
        grace_period: int = config.runtime_resources_deletion_grace_period,
        db_session: Session = Depends(deps.get_db_session),
):
    for kind in RuntimeKinds.runtime_with_handlers():
        runtime_handler = get_runtime_handler(kind)
        runtime_handler.delete_resources(get_db(), db_session, label_selector,
                                         force, grace_period)
    return Response(status_code=HTTPStatus.NO_CONTENT.value)
示例#6
0
def get_runtime(kind: str, label_selector: str = None):
    if kind not in RuntimeKinds.runtime_with_handlers():
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      kind=kind,
                      err="Invalid runtime kind")
    runtime_handler = get_runtime_handler(kind)
    resources = runtime_handler.list_resources(label_selector)
    return {
        "kind": kind,
        "resources": resources,
    }
示例#7
0
def get_runtime(kind: str, label_selector: str = None):
    if kind not in RuntimeKinds.runtime_with_handlers():
        log_and_raise(status.HTTP_400_BAD_REQUEST,
                      kind=kind,
                      err='Invalid runtime kind')
    runtime_handler = get_runtime_handler(kind)
    resources = runtime_handler.list_resources(label_selector)
    return {
        'kind': kind,
        'resources': resources,
    }
示例#8
0
def delete_runtime(kind: str,
                   label_selector: str = None,
                   force: bool = False,
                   db_session: Session = Depends(deps.get_db_session)):
    if kind not in RuntimeKinds.runtime_with_handlers():
        log_and_raise(status.HTTP_400_BAD_REQUEST,
                      kind=kind,
                      err='Invalid runtime kind')
    runtime_handler = get_runtime_handler(kind)
    runtime_handler.delete_resources(get_db(), db_session, label_selector,
                                     force)
    return Response(status_code=status.HTTP_204_NO_CONTENT)
示例#9
0
def _cleanup_runtimes():
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            try:
                runtime_handler = get_runtime_handler(kind)
                runtime_handler.delete_resources(get_db(), db_session)
            except Exception as exc:
                logger.warning("Failed deleting resources. Ignoring",
                               exc=str(exc),
                               kind=kind)
    finally:
        close_session(db_session)
示例#10
0
def _monitor_runs():
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            try:
                runtime_handler = get_runtime_handler(kind)
                runtime_handler.monitor_runs(get_db(), db_session)
            except Exception as exc:
                logger.warning("Failed monitoring runs. Ignoring",
                               exc=str(exc),
                               kind=kind)
    finally:
        close_session(db_session)
示例#11
0
def delete_runtime(
        kind: str,
        label_selector: str = None,
        force: bool = False,
        grace_period: int = config.runtime_resources_deletion_grace_period,
        db_session: Session = Depends(deps.get_db_session),
):
    if kind not in RuntimeKinds.runtime_with_handlers():
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      kind=kind,
                      err="Invalid runtime kind")
    runtime_handler = get_runtime_handler(kind)
    runtime_handler.delete_resources(get_db(), db_session, label_selector,
                                     force, grace_period)
    return Response(status_code=HTTPStatus.NO_CONTENT.value)
示例#12
0
def _build_function(db_session, function, with_mlrun):
    fn = None
    ready = None
    try:
        fn = new_function(runtime=function)

        run_db = get_run_db_instance(db_session)
        fn.set_db_connection(run_db)
        fn.save(versioned=False)
        if fn.kind in RuntimeKinds.nuclio_runtimes():
            deploy_nuclio_function(fn)
            # deploy only start the process, the get status API is used to check readiness
            ready = False
        else:
            ready = build_runtime(fn, with_mlrun)
        fn.save(versioned=True)
        logger.info("Fn:\n %s", fn.to_yaml())
    except Exception as err:
        logger.error(traceback.format_exc())
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      reason="runtime error: {}".format(err))
    return fn, ready
示例#13
0
def build_status(
        name: str = "",
        project: str = "",
        tag: str = "",
        offset: int = 0,
        logs: bool = True,
        last_log_timestamp: float = 0.0,
        verbose: bool = False,
        db_session: Session = Depends(deps.get_db_session),
):
    fn = get_db().get_function(db_session, name, project, tag)
    if not fn:
        log_and_raise(HTTPStatus.NOT_FOUND.value,
                      name=name,
                      project=project,
                      tag=tag)

    # nuclio deploy status
    if fn.get("kind") in RuntimeKinds.nuclio_runtimes():
        (
            state,
            address,
            nuclio_name,
            last_log_timestamp,
            text,
        ) = get_nuclio_deploy_status(name,
                                     project,
                                     tag,
                                     last_log_timestamp=last_log_timestamp,
                                     verbose=verbose)
        if state == "ready":
            logger.info("Nuclio function deployed successfully", name=name)
        if state == "error":
            logger.error(f"Nuclio deploy error, {text}", name=name)
        update_in(fn, "status.nuclio_name", nuclio_name)
        update_in(fn, "status.state", state)
        update_in(fn, "status.address", address)

        versioned = False
        if state == "ready":
            # Versioned means the version will be saved in the DB forever, we don't want to spam
            # the DB with intermediate or unusable versions, only successfully deployed versions
            versioned = True
        get_db().store_function(db_session,
                                fn,
                                name,
                                project,
                                tag,
                                versioned=versioned)
        return Response(
            content=text,
            media_type="text/plain",
            headers={
                "x-mlrun-function-status": state,
                "x-mlrun-last-timestamp": str(last_log_timestamp),
                "x-mlrun-address": address,
                "x-mlrun-name": nuclio_name,
            },
        )

    # job deploy status
    state = get_in(fn, "status.state", "")
    pod = get_in(fn, "status.build_pod", "")
    image = get_in(fn, "spec.build.image", "")
    out = b""
    if not pod:
        if state == "ready":
            image = image or get_in(fn, "spec.image")
        return Response(
            content=out,
            media_type="text/plain",
            headers={
                "function_status": state,
                "function_image": image,
                "builder_pod": pod,
            },
        )

    logger.info("get pod {} status".format(pod))
    state = get_k8s().get_pod_status(pod)
    logger.info("pod state={}".format(state))

    if state == "succeeded":
        logger.info("build completed successfully")
        state = "ready"
    if state in ["failed", "error"]:
        logger.error("build {}, watch the build pod logs: {}".format(
            state, pod))

    if logs and state != "pending":
        resp = get_k8s().logs(pod)
        if resp:
            out = resp.encode()[offset:]

    update_in(fn, "status.state", state)
    if state == "ready":
        update_in(fn, "spec.image", image)

    versioned = False
    if state == "ready":
        versioned = True
    get_db().store_function(db_session,
                            fn,
                            name,
                            project,
                            tag,
                            versioned=versioned)

    return Response(
        content=out,
        media_type="text/plain",
        headers={
            "x-mlrun-function-status": state,
            "function_status": state,
            "function_image": image,
            "builder_pod": pod,
        },
    )
示例#14
0
def _build_function(
    db_session,
    auth_info: mlrun.api.schemas.AuthInfo,
    function,
    with_mlrun=True,
    skip_deployed=False,
    mlrun_version_specifier=None,
    builder_env=None,
):
    fn = None
    ready = None
    try:
        fn = new_function(runtime=function)
    except Exception as err:
        logger.error(traceback.format_exc())
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      reason=f"runtime error: {err}")
    try:
        run_db = get_run_db_instance(db_session)
        fn.set_db_connection(run_db)
        fn.save(versioned=False)
        if fn.kind in RuntimeKinds.nuclio_runtimes():
            mlrun.api.api.utils.ensure_function_has_auth_set(fn, auth_info)
            mlrun.api.api.utils.process_function_service_account(fn)

            if fn.kind == RuntimeKinds.serving:
                # Handle model monitoring
                try:
                    if fn.spec.track_models:
                        logger.info(
                            "Tracking enabled, initializing model monitoring")
                        _init_serving_function_stream_args(fn=fn)
                        model_monitoring_access_key = _process_model_monitoring_secret(
                            db_session,
                            fn.metadata.project,
                            "MODEL_MONITORING_ACCESS_KEY",
                        )

                        _create_model_monitoring_stream(
                            project=fn.metadata.project)
                        mlrun.api.crud.ModelEndpoints(
                        ).deploy_monitoring_functions(
                            project=fn.metadata.project,
                            model_monitoring_access_key=
                            model_monitoring_access_key,
                            db_session=db_session,
                            auth_info=auth_info,
                        )
                except Exception as exc:
                    logger.warning(
                        "Failed deploying model monitoring infrastructure for the project",
                        project=fn.metadata.project,
                        exc=exc,
                        traceback=traceback.format_exc(),
                    )

            deploy_nuclio_function(fn, auth_info=auth_info)
            # deploy only start the process, the get status API is used to check readiness
            ready = False
        else:
            ready = build_runtime(
                auth_info,
                fn,
                with_mlrun,
                mlrun_version_specifier,
                skip_deployed,
                builder_env=builder_env,
            )
        fn.save(versioned=True)
        logger.info("Fn:\n %s", fn.to_yaml())
    except Exception as err:
        logger.error(traceback.format_exc())
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      reason=f"runtime error: {err}")
    return fn, ready
示例#15
0
def build_status(
        name: str = "",
        project: str = "",
        tag: str = "",
        offset: int = 0,
        logs: bool = True,
        last_log_timestamp: float = 0.0,
        verbose: bool = False,
        auth_info: mlrun.api.schemas.AuthInfo = Depends(
            deps.authenticate_request),
        db_session: Session = Depends(deps.get_db_session),
):
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).query_project_resource_permissions(
        mlrun.api.schemas.AuthorizationResourceTypes.function,
        project or mlrun.mlconf.default_project,
        name,
        # store since with the current mechanism we update the status (and store the function) in the DB when a client
        # query for the status
        mlrun.api.schemas.AuthorizationAction.store,
        auth_info,
    )
    fn = mlrun.api.crud.Functions().get_function(db_session, name, project,
                                                 tag)
    if not fn:
        log_and_raise(HTTPStatus.NOT_FOUND.value,
                      name=name,
                      project=project,
                      tag=tag)

    # nuclio deploy status
    if fn.get("kind") in RuntimeKinds.nuclio_runtimes():
        (
            state,
            address,
            nuclio_name,
            last_log_timestamp,
            text,
            status,
        ) = get_nuclio_deploy_status(
            name,
            project,
            tag,
            last_log_timestamp=last_log_timestamp,
            verbose=verbose,
            auth_info=auth_info,
        )
        if state == "ready":
            logger.info("Nuclio function deployed successfully", name=name)
        if state in ["error", "unhealthy"]:
            logger.error(f"Nuclio deploy error, {text}", name=name)

        internal_invocation_urls = status.get("internalInvocationUrls", [])
        external_invocation_urls = status.get("externalInvocationUrls", [])

        # on earlier versions of mlrun, address used to represent the nodePort external invocation url
        # now that functions can be not exposed (using service_type clusterIP) this no longer relevant
        # and hence, for BC it would be filled with the external invocation url first item
        # or completely empty.
        address = external_invocation_urls[
            0] if external_invocation_urls else ""

        update_in(fn, "status.nuclio_name", nuclio_name)
        update_in(fn, "status.internal_invocation_urls",
                  internal_invocation_urls)
        update_in(fn, "status.external_invocation_urls",
                  external_invocation_urls)
        update_in(fn, "status.state", state)
        update_in(fn, "status.address", address)

        versioned = False
        if state == "ready":
            # Versioned means the version will be saved in the DB forever, we don't want to spam
            # the DB with intermediate or unusable versions, only successfully deployed versions
            versioned = True
        mlrun.api.crud.Functions().store_function(
            db_session,
            fn,
            name,
            project,
            tag,
            versioned=versioned,
        )
        return Response(
            content=text,
            media_type="text/plain",
            headers={
                "x-mlrun-function-status":
                state,
                "x-mlrun-last-timestamp":
                str(last_log_timestamp),
                "x-mlrun-address":
                address,
                "x-mlrun-internal-invocation-urls":
                ",".join(internal_invocation_urls),
                "x-mlrun-external-invocation-urls":
                ",".join(external_invocation_urls),
                "x-mlrun-name":
                nuclio_name,
            },
        )

    # job deploy status
    state = get_in(fn, "status.state", "")
    pod = get_in(fn, "status.build_pod", "")
    image = get_in(fn, "spec.build.image", "")
    out = b""
    if not pod:
        if state == "ready":
            image = image or get_in(fn, "spec.image")
        return Response(
            content=out,
            media_type="text/plain",
            headers={
                "function_status": state,
                "function_image": image,
                "builder_pod": pod,
            },
        )

    logger.info(f"get pod {pod} status")
    state = get_k8s().get_pod_status(pod)
    logger.info(f"pod state={state}")

    if state == "succeeded":
        logger.info("build completed successfully")
        state = mlrun.api.schemas.FunctionState.ready
    if state in ["failed", "error"]:
        logger.error(f"build {state}, watch the build pod logs: {pod}")
        state = mlrun.api.schemas.FunctionState.error

    if logs and state != "pending":
        resp = get_k8s().logs(pod)
        if resp:
            out = resp.encode()[offset:]

    update_in(fn, "status.state", state)
    if state == mlrun.api.schemas.FunctionState.ready:
        update_in(fn, "spec.image", image)

    versioned = False
    if state == mlrun.api.schemas.FunctionState.ready:
        versioned = True
    mlrun.api.crud.Functions().store_function(
        db_session,
        fn,
        name,
        project,
        tag,
        versioned=versioned,
    )

    return Response(
        content=out,
        media_type="text/plain",
        headers={
            "x-mlrun-function-status": state,
            "function_status": state,
            "function_image": image,
            "builder_pod": pod,
        },
    )
示例#16
0
def build_status(
        name: str = "",
        project: str = "",
        tag: str = "",
        offset: int = 0,
        logs: bool = True,
        last_log_timestamp: float = 0.0,
        verbose: bool = False,
        auth_verifier: deps.AuthVerifier = Depends(deps.AuthVerifier),
        db_session: Session = Depends(deps.get_db_session),
):
    fn = get_db().get_function(db_session, name, project, tag)
    if not fn:
        log_and_raise(HTTPStatus.NOT_FOUND.value,
                      name=name,
                      project=project,
                      tag=tag)

    # nuclio deploy status
    if fn.get("kind") in RuntimeKinds.nuclio_runtimes():
        (
            state,
            address,
            nuclio_name,
            last_log_timestamp,
            text,
            status,
        ) = get_nuclio_deploy_status(name,
                                     project,
                                     tag,
                                     last_log_timestamp=last_log_timestamp,
                                     verbose=verbose)
        if state == "ready":
            logger.info("Nuclio function deployed successfully", name=name)
        if state in ["error", "unhealthy"]:
            logger.error(f"Nuclio deploy error, {text}", name=name)

        # internal / external invocation urls were added on nuclio 1.6.x
        # and hence, it might be empty
        # to backward compatible with older nuclio versions, we use hard-coded default values
        internal_invocation_urls = status.get(
            "internalInvocationUrls",
            [resolve_function_internal_invocation_url(name)])
        external_invocation_urls = status.get("externalInvocationUrls",
                                              [address] if address else [])

        # on nuclio > 1.6.x we get the external invocation url on the status block
        if external_invocation_urls and not address:
            address = external_invocation_urls[0]

        update_in(fn, "status.nuclio_name", nuclio_name)
        update_in(fn, "status.internal_invocation_urls",
                  internal_invocation_urls)
        update_in(fn, "status.external_invocation_urls",
                  external_invocation_urls)
        update_in(fn, "status.state", state)
        update_in(fn, "status.address", address)

        versioned = False
        if state == "ready":
            # Versioned means the version will be saved in the DB forever, we don't want to spam
            # the DB with intermediate or unusable versions, only successfully deployed versions
            versioned = True
        get_db().store_function(
            db_session,
            fn,
            name,
            project,
            tag,
            versioned=versioned,
            leader_session=auth_verifier.auth_info.session,
        )
        return Response(
            content=text,
            media_type="text/plain",
            headers={
                "x-mlrun-function-status":
                state,
                "x-mlrun-last-timestamp":
                str(last_log_timestamp),
                "x-mlrun-address":
                address,
                "x-mlrun-internal-invocation-urls":
                ",".join(internal_invocation_urls),
                "x-mlrun-external-invocation-urls":
                ",".join(external_invocation_urls),
                "x-mlrun-name":
                nuclio_name,
            },
        )

    # job deploy status
    state = get_in(fn, "status.state", "")
    pod = get_in(fn, "status.build_pod", "")
    image = get_in(fn, "spec.build.image", "")
    out = b""
    if not pod:
        if state == "ready":
            image = image or get_in(fn, "spec.image")
        return Response(
            content=out,
            media_type="text/plain",
            headers={
                "function_status": state,
                "function_image": image,
                "builder_pod": pod,
            },
        )

    logger.info(f"get pod {pod} status")
    state = get_k8s().get_pod_status(pod)
    logger.info(f"pod state={state}")

    if state == "succeeded":
        logger.info("build completed successfully")
        state = mlrun.api.schemas.FunctionState.ready
    if state in ["failed", "error"]:
        logger.error(f"build {state}, watch the build pod logs: {pod}")
        state = mlrun.api.schemas.FunctionState.error

    if logs and state != "pending":
        resp = get_k8s().logs(pod)
        if resp:
            out = resp.encode()[offset:]

    update_in(fn, "status.state", state)
    if state == mlrun.api.schemas.FunctionState.ready:
        update_in(fn, "spec.image", image)

    versioned = False
    if state == mlrun.api.schemas.FunctionState.ready:
        versioned = True
    get_db().store_function(
        db_session,
        fn,
        name,
        project,
        tag,
        versioned=versioned,
        leader_session=auth_verifier.auth_info.session,
    )

    return Response(
        content=out,
        media_type="text/plain",
        headers={
            "x-mlrun-function-status": state,
            "function_status": state,
            "function_image": image,
            "builder_pod": pod,
        },
    )