Exemplo n.º 1
0
async def store_artifact(
        request: Request,
        project: str,
        uid: str,
        key: str,
        tag: str = "",
        iter: int = 0,
        db_session: Session = Depends(deps.get_db_session),
):
    data = None
    try:
        data = await request.json()
    except ValueError:
        log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body")

    logger.debug("Storing artifact", data=data)
    await run_in_threadpool(
        get_db().store_artifact,
        db_session,
        key,
        data,
        uid,
        iter=iter,
        tag=tag,
        project=project,
    )
    return {}
Exemplo n.º 2
0
 async def invoke_schedule(self, db_session: Session, project: str,
                           name: str):
     logger.debug("Invoking schedule", project=project, name=name)
     db_schedule = get_db().get_schedule(db_session, project, name)
     function, args, kwargs = self._resolve_job_function(
         db_schedule.kind, db_schedule.scheduled_object, name)
     return await function(*args, **kwargs)
Exemplo n.º 3
0
 def patch_project(
     self,
     session: sqlalchemy.orm.Session,
     name: str,
     project: dict,
     patch_mode: mlrun.api.schemas.PatchMode = mlrun.api.schemas.PatchMode.
     replace,
 ):
     logger.debug(
         "Patching project in Nuclio",
         name=name,
         project=project,
         patch_mode=patch_mode,
     )
     response = self._get_project_from_nuclio(name)
     response_body = response.json()
     if project.get("metadata", {}).get("labels") is not None:
         response_body.setdefault("metadata", {}).setdefault(
             "labels", {}).update(project["metadata"]["labels"])
     if project.get("metadata", {}).get("annotations") is not None:
         response_body.setdefault("metadata", {}).setdefault(
             "annotations", {}).update(project["metadata"]["annotations"])
     if project.get("spec", {}).get("description") is not None:
         response_body.setdefault(
             "spec", {})["description"] = project["spec"]["description"]
     self._put_project_to_nuclio(response_body)
Exemplo n.º 4
0
 def delete_project(
     self,
     session: sqlalchemy.orm.Session,
     name: str,
     deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.
     schemas.DeletionStrategy.default(),
 ):
     logger.debug("Deleting project in Nuclio",
                  name=name,
                  deletion_strategy=deletion_strategy)
     body = self._generate_request_body(
         mlrun.api.schemas.Project(
             metadata=mlrun.api.schemas.ProjectMetadata(name=name)))
     headers = {
         "x-nuclio-delete-project-strategy":
         deletion_strategy.to_nuclio_deletion_strategy(),
     }
     try:
         self._send_request_to_api("DELETE",
                                   "projects",
                                   json=body,
                                   headers=headers)
     except requests.HTTPError as exc:
         if exc.response.status_code != http.HTTPStatus.NOT_FOUND.value:
             raise
         logger.debug(
             "Project not found in Nuclio. Considering deletion as successful",
             name=name,
             deletion_strategy=deletion_strategy,
         )
Exemplo n.º 5
0
 def delete_project(
     self,
     session: sqlalchemy.orm.Session,
     name: str,
     deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.
     schemas.DeletionStrategy.default(),
     auth_info: mlrun.api.schemas.AuthInfo = mlrun.api.schemas.AuthInfo(),
     # In follower mode the store of the projects objects themselves is just a dict in the follower member class
     # therefore two methods here (existence check + deletion) need to happen on the store itself (and not the db
     # like the rest of the actions) so enabling to overriding this store with this arg..
     # I felt like defining another layer and interface only for these two methods is an overkill, so although it's a
     # bit ugly I feel like it's fine
     projects_store_override=None,
 ):
     logger.debug("Deleting project",
                  name=name,
                  deletion_strategy=deletion_strategy)
     projects_store = (projects_store_override
                       or mlrun.api.utils.singletons.db.get_db())
     if (deletion_strategy.is_restricted() or deletion_strategy
             == mlrun.api.schemas.DeletionStrategy.check):
         if not projects_store.is_project_exists(
                 session, name, leader_session=auth_info.session):
             return
         mlrun.api.utils.singletons.db.get_db(
         ).verify_project_has_no_related_resources(session, name)
         self._verify_project_has_no_external_resources(name)
         if deletion_strategy == mlrun.api.schemas.DeletionStrategy.check:
             return
     elif deletion_strategy.is_cascading():
         self.delete_project_resources(session, name)
     else:
         raise mlrun.errors.MLRunInvalidArgumentError(
             f"Unknown deletion strategy: {deletion_strategy}")
     projects_store.delete_project(session, name, deletion_strategy)
Exemplo n.º 6
0
    def create_schedule(
        self,
        db_session: Session,
        project: str,
        name: str,
        kind: schemas.ScheduleKinds,
        scheduled_object: Union[Dict, Callable],
        cron_trigger: Union[str, schemas.ScheduleCronTrigger],
        labels: Dict = None,
    ):
        if isinstance(cron_trigger, str):
            cron_trigger = schemas.ScheduleCronTrigger.from_crontab(
                cron_trigger)

        self._validate_cron_trigger(cron_trigger)

        logger.debug(
            "Creating schedule",
            project=project,
            name=name,
            kind=kind,
            scheduled_object=scheduled_object,
            cron_trigger=cron_trigger,
            labels=labels,
        )
        get_project_member().ensure_project(db_session, project)
        get_db().create_schedule(db_session, project, name, kind,
                                 scheduled_object, cron_trigger, labels)
        self._create_schedule_in_scheduler(project, name, kind,
                                           scheduled_object, cron_trigger)
Exemplo n.º 7
0
 def try_get_grafana_service_url(self,
                                 session: str) -> typing.Optional[str]:
     """
     Try to find a ready grafana app service, and return its URL
     If nothing found, returns None
     """
     logger.debug("Getting grafana service url from Iguazio")
     response = self._send_request_to_api("GET", "app_services_manifests",
                                          session)
     response_body = response.json()
     for app_services_manifest in response_body.get("data", []):
         for app_service in app_services_manifest.get("attributes", {}).get(
                 "app_services", []):
             if (app_service.get("spec", {}).get("kind") == "grafana" and
                     app_service.get("status", {}).get("state") == "ready"
                     and len(app_service.get("status", {}).get("urls",
                                                               [])) > 0):
                 url_kind_to_url = {}
                 for url in app_service["status"]["urls"]:
                     url_kind_to_url[url["kind"]] = url["url"]
                 # precedence for https
                 for kind in ["https", "http"]:
                     if kind in url_kind_to_url:
                         return url_kind_to_url[kind]
     return None
Exemplo n.º 8
0
async def store_function(
        request: Request,
        project: str,
        name: str,
        tag: str = "",
        versioned: bool = False,
        db_session: Session = Depends(deps.get_db_session),
):
    data = None
    try:
        data = await request.json()
    except ValueError:
        log_and_raise(HTTPStatus.BAD_REQUEST, reason="bad JSON body")

    logger.debug(data)
    logger.info("store function: project=%s, name=%s, tag=%s", project, name,
                tag)
    hash_key = await run_in_threadpool(
        get_db().store_function,
        db_session,
        data,
        name,
        project,
        tag=tag,
        versioned=versioned,
    )
    return {
        'hash_key': hash_key,
    }
Exemplo n.º 9
0
 def _update_schedule_in_scheduler(
     self,
     project: str,
     name: str,
     kind: schemas.ScheduleKinds,
     scheduled_object: Any,
     cron_trigger: schemas.ScheduleCronTrigger,
     concurrency_limit: int,
 ):
     job_id = self._resolve_job_id(project, name)
     logger.debug("Updating schedule in scheduler", job_id=job_id)
     function, args, kwargs = self._resolve_job_function(
         kind,
         scheduled_object,
         project,
         name,
         concurrency_limit,
     )
     trigger = self.transform_schemas_cron_trigger_to_apscheduler_cron_trigger(
         cron_trigger)
     now = datetime.now(self._scheduler.timezone)
     next_run_time = trigger.get_next_fire_time(None, now)
     self._scheduler.modify_job(
         job_id,
         func=function,
         args=args,
         kwargs=kwargs,
         trigger=trigger,
         next_run_time=next_run_time,
     )
Exemplo n.º 10
0
 def query_permissions(
     self,
     resource: str,
     action: mlrun.api.schemas.AuthorizationAction,
     auth_info: mlrun.api.schemas.AuthInfo,
     raise_on_forbidden: bool = True,
 ) -> bool:
     if (self._is_request_from_leader(auth_info.projects_role)
             or mlrun.mlconf.httpdb.authorization.mode == "none"):
         return True
     body = self._generate_permission_request_body(resource, action,
                                                   auth_info)
     if self._log_level > 5:
         logger.debug("Sending request to OPA", body=body)
     response = self._send_request_to_api("POST",
                                          self._permission_query_path,
                                          json=body)
     response_body = response.json()
     if self._log_level > 5:
         logger.debug("Received response from OPA", body=response_body)
     allowed = response_body["result"]
     if not allowed and raise_on_forbidden:
         raise mlrun.errors.MLRunAccessDeniedError(
             f"Not allowed to {action} resource {resource}")
     return allowed
Exemplo n.º 11
0
    def _create_schedule_in_scheduler(
        self,
        project: str,
        name: str,
        kind: schemas.ScheduleKinds,
        scheduled_object: Any,
        cron_trigger: schemas.ScheduleCronTrigger,
        concurrency_limit: int,
    ):
        job_id = self._resolve_job_id(project, name)
        logger.debug("Adding schedule to scheduler", job_id=job_id)
        function, args, kwargs = self._resolve_job_function(
            kind,
            scheduled_object,
            project,
            name,
            concurrency_limit,
        )

        # we use max_instances as well as our logic in the run wrapper for concurrent jobs
        # in order to allow concurrency for triggering the jobs (max_instances), and concurrency
        # of the jobs themselves (our logic in the run wrapper).
        self._scheduler.add_job(
            function,
            self.transform_schemas_cron_trigger_to_apscheduler_cron_trigger(
                cron_trigger),
            args,
            kwargs,
            job_id,
            max_instances=concurrency_limit,
        )
Exemplo n.º 12
0
def get_filestat(request: Request, schema: str = "", path: str = "", user: str = ""):
    _, filename = path.split(path)

    path = get_obj_path(schema, path, user=user)
    if not path:
        log_and_raise(
            HTTPStatus.NOT_FOUND.value, path=path, err="illegal path prefix or schema"
        )

    logger.debug("Got get filestat request", path=path)

    secrets = get_secrets(request)
    stat = None
    try:
        stores = store_manager.set(secrets)
        stat = stores.object(url=path).stat()
    except FileNotFoundError as exc:
        log_and_raise(HTTPStatus.NOT_FOUND.value, path=path, err=str(exc))

    ctype, _ = mimetypes.guess_type(path)
    if not ctype:
        ctype = "application/octet-stream"

    return {
        "size": stat.size,
        "modified": stat.modified,
        "mimetype": ctype,
    }
Exemplo n.º 13
0
 def delete_project(self, session: sqlalchemy.orm.Session, name: str):
     logger.debug("Deleting project in Nuclio", name=name)
     body = self._generate_request_body(
         mlrun.api.schemas.Project(
             metadata=mlrun.api.schemas.ProjectMetadata(name=name)
         )
     )
     self._send_request_to_api("DELETE", "projects", json=body)
Exemplo n.º 14
0
 def store_project(
     self,
     session: sqlalchemy.orm.Session,
     name: str,
     project: mlrun.api.schemas.Project,
 ):
     logger.debug("Storing project", name=name, project=project)
     mlrun.api.utils.singletons.db.get_db().store_project(session, name, project)
Exemplo n.º 15
0
    def _backup_revision_sqlite(db_file_path: str, current_version: str):
        db_dir_path = pathlib.Path(os.path.dirname(db_file_path))
        backup_path = db_dir_path / f"{current_version}.db"

        logger.debug(
            "Backing up DB file", db_file_path=db_file_path, backup_path=backup_path
        )
        shutil.copy2(db_file_path, backup_path)
Exemplo n.º 16
0
def run_function_periodically(interval: int, function, *args, **kwargs):
    global tasks
    logger.debug(
        f'Submitting function to run periodically: {function.__name__}')
    loop = asyncio.get_running_loop()
    task = loop.create_task(
        _periodic_function_wrapper(interval, function, *args, **kwargs))
    tasks.append(task)
Exemplo n.º 17
0
 def delete_schedule(self, db_session: Session, project: str, name: str):
     logger.debug("Deleting schedule", project=project, name=name)
     job_id = self._resolve_job_id(project, name)
     # don't fail on delete if job doesn't exist
     job = self._scheduler.get_job(job_id)
     if job:
         self._scheduler.remove_job(job_id)
     get_db().delete_schedule(db_session, project, name)
Exemplo n.º 18
0
 def create_project(
     self,
     session: str,
     project: mlrun.api.schemas.Project,
     wait_for_completion: bool = True,
 ) -> bool:
     logger.debug("Creating project in Iguazio", project=project)
     body = self._transform_mlrun_project_to_iguazio_project(project)
     return self._create_project_in_iguazio(session, body, wait_for_completion)
Exemplo n.º 19
0
 def delete_schedule(
     self,
     db_session: Session,
     project: str,
     name: str,
 ):
     logger.debug("Deleting schedule", project=project, name=name)
     self._remove_schedule_scheduler_resources(project, name)
     get_db().delete_schedule(db_session, project, name)
Exemplo n.º 20
0
def _cleanup_runtimes():
    logger.debug('Cleaning runtimes')
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            runtime_handler = get_runtime_handler(kind)
            runtime_handler.delete_resources(get_db(), db_session)
    finally:
        close_session(db_session)
Exemplo n.º 21
0
 def create_project(
     self,
     session: str,
     project: mlrun.api.schemas.Project,
     wait_for_completion: bool = True,
 ) -> typing.Tuple[mlrun.api.schemas.Project, bool]:
     logger.debug("Creating project in Iguazio", project=project)
     body = self._generate_request_body(project)
     return self._create_project_in_iguazio(session, body,
                                            wait_for_completion)
Exemplo n.º 22
0
def store_function(project, name):
    try:
        data = request.get_json(force=True)
    except ValueError:
        return json_error(HTTPStatus.BAD_REQUEST, reason='bad JSON body')

    logger.debug(data)
    tag = request.args.get('tag', '')

    _db.store_function(data, name, project, tag)
    return jsonify(ok=True)
Exemplo n.º 23
0
def update_run(project, uid):
    try:
        data = request.get_json(force=True)
    except ValueError:
        return json_error(HTTPStatus.BAD_REQUEST, reason='bad JSON body')

    logger.debug(data)
    iter = int(request.args.get('iter', '0'))
    _db.update_run(data, uid, project, iter=iter)
    app.logger.info('update run: {}'.format(data))
    return jsonify(ok=True)
Exemplo n.º 24
0
 def get_schedule(
     self,
     db_session: Session,
     project: str,
     name: str,
     include_last_run: bool = False,
 ) -> schemas.ScheduleOutput:
     logger.debug("Getting schedule", project=project, name=name)
     db_schedule = get_db().get_schedule(db_session, project, name)
     return self._transform_and_enrich_db_schedule(db_session, db_schedule,
                                                   include_last_run)
Exemplo n.º 25
0
def store_artifact(project, uid, key):
    try:
        data = request.get_json(force=True)
    except ValueError:
        return json_error(HTTPStatus.BAD_REQUEST, reason='bad JSON body')

    logger.debug(data)
    tag = request.args.get('tag', '')
    iter = int(request.args.get('iter', '0'))
    _db.store_artifact(key, data, uid, iter=iter, tag=tag, project=project)
    return jsonify(ok=True)
Exemplo n.º 26
0
    def create_pipeline(
        self,
        experiment_name: str,
        run_name: str,
        content_type: str,
        data: bytes,
        arguments: dict = None,
        namespace: str = mlrun.mlconf.namespace,
    ):
        if arguments is None:
            arguments = {}
        if "/yaml" in content_type:
            content_type = ".yaml"
        elif " /zip" in content_type:
            content_type = ".zip"
        else:
            mlrun.api.api.utils.log_and_raise(
                http.HTTPStatus.BAD_REQUEST.value,
                reason=f"unsupported pipeline type {content_type}",
            )

        logger.debug("Writing pipeline to temp file", content_type=content_type)
        print(str(data))

        pipeline_file = tempfile.NamedTemporaryFile(suffix=content_type)
        with open(pipeline_file.name, "wb") as fp:
            fp.write(data)

        logger.info(
            "Creating pipeline",
            experiment_name=experiment_name,
            run_name=run_name,
            arguments=arguments,
        )

        try:
            kfp_client = kfp.Client(namespace=namespace)
            experiment = kfp_client.create_experiment(name=experiment_name)
            run = kfp_client.run_pipeline(
                experiment.id, run_name, pipeline_file.name, params=arguments
            )
        except Exception as exc:
            logger.warning(
                "Failed creating pipeline",
                traceback=traceback.format_exc(),
                exc=str(exc),
            )
            raise mlrun.errors.MLRunBadRequestError(f"Failed creating pipeline: {exc}")
        finally:
            pipeline_file.close()

        return run
Exemplo n.º 27
0
 def patch_project(
     self,
     session: sqlalchemy.orm.Session,
     name: str,
     project: dict,
     patch_mode: mlrun.api.schemas.PatchMode = mlrun.api.schemas.PatchMode.replace,
 ):
     logger.debug(
         "Patching project", name=name, project=project, patch_mode=patch_mode
     )
     mlrun.api.utils.singletons.db.get_db().patch_project(
         session, name, project, patch_mode
     )
Exemplo n.º 28
0
 async def invoke_schedule(self, db_session: Session, project: str,
                           name: str):
     logger.debug("Invoking schedule", project=project, name=name)
     db_schedule = await fastapi.concurrency.run_in_threadpool(
         get_db().get_schedule, db_session, project, name)
     function, args, kwargs = self._resolve_job_function(
         db_schedule.kind,
         db_schedule.scheduled_object,
         project,
         name,
         db_schedule.concurrency_limit,
     )
     return await function(*args, **kwargs)
Exemplo n.º 29
0
    def update_schedule(
        self,
        db_session: Session,
        auth_info: mlrun.api.schemas.AuthInfo,
        project: str,
        name: str,
        scheduled_object: Union[Dict, Callable] = None,
        cron_trigger: Union[str, schemas.ScheduleCronTrigger] = None,
        labels: Dict = None,
        concurrency_limit: int = None,
    ):
        if isinstance(cron_trigger, str):
            cron_trigger = schemas.ScheduleCronTrigger.from_crontab(
                cron_trigger)

        if cron_trigger is not None:
            self._validate_cron_trigger(cron_trigger)

        logger.debug(
            "Updating schedule",
            project=project,
            name=name,
            scheduled_object=scheduled_object,
            cron_trigger=cron_trigger,
            labels=labels,
            concurrency_limit=concurrency_limit,
        )
        get_db().update_schedule(
            db_session,
            project,
            name,
            scheduled_object,
            cron_trigger,
            labels,
            concurrency_limit,
        )
        db_schedule = get_db().get_schedule(db_session, project, name)
        updated_schedule = self._transform_and_enrich_db_schedule(
            db_session, db_schedule)

        self._ensure_auth_info_has_access_key(auth_info, db_schedule.kind)
        self._store_schedule_secrets(auth_info, project, name)
        self._update_schedule_in_scheduler(
            project,
            name,
            updated_schedule.kind,
            updated_schedule.scheduled_object,
            updated_schedule.cron_trigger,
            updated_schedule.concurrency_limit,
            auth_info,
        )
Exemplo n.º 30
0
    def create_schedule(
        self,
        db_session: Session,
        auth_info: mlrun.api.schemas.AuthInfo,
        project: str,
        name: str,
        kind: schemas.ScheduleKinds,
        scheduled_object: Union[Dict, Callable],
        cron_trigger: Union[str, schemas.ScheduleCronTrigger],
        labels: Dict = None,
        concurrency_limit: int = config.httpdb.scheduling.
        default_concurrency_limit,
    ):
        if isinstance(cron_trigger, str):
            cron_trigger = schemas.ScheduleCronTrigger.from_crontab(
                cron_trigger)

        self._validate_cron_trigger(cron_trigger)

        logger.debug(
            "Creating schedule",
            project=project,
            name=name,
            kind=kind,
            scheduled_object=scheduled_object,
            cron_trigger=cron_trigger,
            labels=labels,
            concurrency_limit=concurrency_limit,
        )
        get_project_member().ensure_project(db_session,
                                            project,
                                            leader_session=auth_info.session)
        get_db().create_schedule(
            db_session,
            project,
            name,
            kind,
            scheduled_object,
            cron_trigger,
            concurrency_limit,
            labels,
        )
        self._create_schedule_in_scheduler(
            project,
            name,
            kind,
            scheduled_object,
            cron_trigger,
            concurrency_limit,
            auth_info,
        )