Пример #1
0
def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]:
    """
    Delete only the task themselves and their non published version.
    Child models under the same project are deleted separately.
    Children tasks should be deleted in the same api call.
    If any child entities are left in another projects then updated their parent task to None
    """
    tasks = Task.objects(project__in=projects).only("id", "execution__artifacts")
    if not tasks:
        return 0, set(), set()

    task_ids = {t.id for t in tasks}
    with TimingContext("mongo", "delete_tasks_update_children"):
        Task.objects(parent__in=task_ids, project__nin=projects).update(parent=None)
        Model.objects(task__in=task_ids, project__nin=projects).update(task=None)

    event_urls, artifact_urls = set(), set()
    for task in tasks:
        event_urls.update(collect_debug_image_urls(company, task.id))
        event_urls.update(collect_plot_image_urls(company, task.id))
        if task.execution and task.execution.artifacts:
            artifact_urls.update(
                {
                    a.uri
                    for a in task.execution.artifacts.values()
                    if a.mode == ArtifactModes.output and a.uri
                }
            )

    event_bll.delete_multi_task_events(company, list(task_ids))
    deleted = tasks.delete()
    return deleted, event_urls, artifact_urls
Пример #2
0
def verify_task_children_and_ouptuts(task: Task,
                                     force: bool) -> TaskOutputs[Model]:
    if not force:
        with TimingContext("mongo", "count_published_children"):
            published_children_count = Task.objects(
                parent=task.id, status=TaskStatus.published).count()
            if published_children_count:
                raise errors.bad_request.TaskCannotBeDeleted(
                    "has children, use force=True",
                    task=task.id,
                    children=published_children_count,
                )

    with TimingContext("mongo", "get_task_models"):
        models = TaskOutputs(
            attrgetter("ready"),
            Model,
            Model.objects(task=task.id).only("id", "task", "ready"),
        )
        if not force and models.published:
            raise errors.bad_request.TaskCannotBeDeleted(
                "has output models, use force=True",
                task=task.id,
                models=len(models.published),
            )

    if task.models and task.models.output:
        with TimingContext("mongo", "get_task_output_model"):
            model_ids = [m.model for m in task.models.output]
            for output_model in Model.objects(id__in=model_ids):
                if output_model.ready:
                    if not force:
                        raise errors.bad_request.TaskCannotBeDeleted(
                            "has output model, use force=True",
                            task=task.id,
                            model=output_model.id,
                        )
                    models.published.append(output_model)
                else:
                    models.draft.append(output_model)

    if models.draft:
        with TimingContext("mongo", "get_execution_models"):
            model_ids = models.draft.ids
            dependent_tasks = Task.objects(
                models__input__model__in=model_ids).only("id", "models")
            input_models = {
                m.model
                for m in chain.from_iterable(
                    t.models.input for t in dependent_tasks if t.models)
            }
            if input_models:
                models.draft = DocumentGroup(
                    Model,
                    (m for m in models.draft if m.id not in input_models))

    return models
Пример #3
0
def delete_metadata(_: APICall, company_id: str,
                    request: DeleteMetadataRequest):
    model_id = request.model
    ModelBLL.get_company_model_by_id(company_id=company_id,
                                     model_id=model_id,
                                     only_fields=("id", ))

    updated = metadata_delete(cls=Model, _id=model_id, keys=request.keys)
    if updated:
        Model.objects(id=model_id).update_one(last_update=datetime.utcnow())

    return {"updated": updated}
Пример #4
0
def add_or_update_metadata(_: APICall, company_id: str,
                           request: AddOrUpdateMetadataRequest):
    model_id = request.model
    ModelBLL.get_company_model_by_id(company_id=company_id, model_id=model_id)

    updated = metadata_add_or_update(
        cls=Model,
        _id=model_id,
        items=get_metadata_from_api(request.metadata),
    )
    if updated:
        Model.objects(id=model_id).update_one(last_update=datetime.utcnow())

    return {"updated": updated}
Пример #5
0
    def model_set_ready(
        cls,
        model_id: str,
        company_id: str,
        publish_task: bool,
        force_publish_task: bool = False,
    ) -> tuple:
        with translate_errors_context():
            query = dict(id=model_id, company=company_id)
            model = Model.objects(**query).first()
            if not model:
                raise errors.bad_request.InvalidModelId(**query)
            elif model.ready:
                raise errors.bad_request.ModelIsReady(**query)

            published_task_data = {}
            if model.task and publish_task:
                task = (Task.objects(id=model.task, company=company_id).only(
                    "id", "status").first())
                if task and task.status != TaskStatus.published:
                    published_task_data["data"] = cls.publish_task(
                        task_id=model.task,
                        company_id=company_id,
                        publish_model=False,
                        force=force_publish_task,
                    )
                    published_task_data["id"] = model.task

            updated = model.update(upsert=False, ready=True)
            return updated, published_task_data
Пример #6
0
def get_by_task_id(call: APICall, company_id, _):
    if call.requested_endpoint_version > ModelsBackwardsCompatibility.max_version:
        raise errors.moved_permanently.NotSupported(
            "use models.get_by_id/get_all apis")

    task_id = call.data["task"]

    with translate_errors_context():
        query = dict(id=task_id, company=company_id)
        task = Task.get(_only=["models"], **query)
        if not task:
            raise errors.bad_request.InvalidTaskId(**query)
        if not task.models or not task.models.output:
            raise errors.bad_request.MissingTaskFields(field="models.output")

        model_id = task.models.output[-1].model
        model = Model.objects(
            Q(id=model_id)
            & get_company_or_none_constraint(company_id)).first()
        if not model:
            raise errors.bad_request.InvalidModelId(
                "no such public or company model",
                id=model_id,
                company=company_id,
            )
        model_dict = model.to_proper_dict()
        conform_output_tags(call, model_dict)
        call.result.data = {"model": model_dict}
Пример #7
0
def get_output_model(task, force=False):
    with TimingContext("mongo", "get_task_output_model"):
        output_model = Model.objects(id=task.output.model).first()
    if output_model and output_model.ready and not force:
        raise errors.bad_request.TaskCannotBeDeleted(
            "has output model, use force=True", task=task.id, model=task.output.model
        )
    return output_model
Пример #8
0
 def get_frameworks(self, company, project_ids: Optional[Sequence]) -> Sequence:
     """
     Return the list of unique frameworks used by company and public models
     If project ids passed then only models from these projects are considered
     """
     query = get_company_or_none_constraint(company)
     if project_ids:
         query &= Q(project__in=project_ids)
     return Model.objects(query).distinct(field="framework")
Пример #9
0
    def unarchive_model(cls, model_id: str, company_id: str):
        cls.get_company_model_by_id(company_id=company_id,
                                    model_id=model_id,
                                    only_fields=("id", ))
        unarchived = Model.objects(company=company_id, id=model_id).update(
            pull__system_tags=EntityVisibility.archived.value,
            last_update=datetime.utcnow(),
        )

        return unarchived
Пример #10
0
    def validate_execution_model(task, allow_only_public=False):
        if not task.execution or not task.execution.model:
            return

        company = None if allow_only_public else task.company
        model_id = task.execution.model
        model = Model.objects(
            Q(id=model_id) & get_company_or_none_constraint(company)).first()
        if not model:
            raise errors.bad_request.InvalidModelId(model=model_id)

        return model
Пример #11
0
 def get_company_model_by_id(cls,
                             company_id: str,
                             model_id: str,
                             only_fields=None) -> Model:
     query = dict(company=company_id, id=model_id)
     qs = Model.objects(**query)
     if only_fields:
         qs = qs.only(*only_fields)
     model = qs.first()
     if not model:
         raise errors.bad_request.InvalidModelId(**query)
     return model
Пример #12
0
    def publish_task(
        cls,
        task_id: str,
        company_id: str,
        publish_model: bool,
        force: bool,
        status_reason: str = "",
        status_message: str = "",
    ) -> dict:
        task = cls.get_task_with_access(task_id,
                                        company_id=company_id,
                                        requires_write_access=True)
        if not force:
            validate_status_change(task.status, TaskStatus.published)

        previous_task_status = task.status
        output = task.output or Output()
        publish_failed = False

        try:
            # set state to publishing
            task.status = TaskStatus.publishing
            task.save()

            # publish task models
            if task.output.model and publish_model:
                output_model = (Model.objects(id=task.output.model).only(
                    "id", "task", "ready").first())
                if output_model and not output_model.ready:
                    cls.model_set_ready(
                        model_id=task.output.model,
                        company_id=company_id,
                        publish_task=False,
                    )

            # set task status to published, and update (or set) it's new output (view and models)
            return ChangeStatusRequest(
                task=task,
                new_status=TaskStatus.published,
                force=force,
                status_reason=status_reason,
                status_message=status_message,
            ).execute(published=datetime.utcnow(), output=output)

        except Exception as ex:
            publish_failed = True
            raise ex
        finally:
            if publish_failed:
                task.status = previous_task_status
                task.save()
Пример #13
0
def publish_task(
    task_id: str,
    company_id: str,
    force: bool,
    publish_model_func: Callable[[str, str], Any] = None,
    status_message: str = "",
    status_reason: str = "",
) -> dict:
    task = TaskBLL.get_task_with_access(
        task_id, company_id=company_id, requires_write_access=True
    )
    if not force:
        validate_status_change(task.status, TaskStatus.published)

    previous_task_status = task.status
    output = task.output or Output()
    publish_failed = False

    try:
        # set state to publishing
        task.status = TaskStatus.publishing
        task.save()

        # publish task models
        if task.models and task.models.output and publish_model_func:
            model_id = task.models.output[-1].model
            model = (
                Model.objects(id=model_id, company=company_id)
                .only("id", "ready")
                .first()
            )
            if model and not model.ready:
                publish_model_func(model.id, company_id)

        # set task status to published, and update (or set) it's new output (view and models)
        return ChangeStatusRequest(
            task=task,
            new_status=TaskStatus.published,
            force=force,
            status_reason=status_reason,
            status_message=status_message,
        ).execute(published=datetime.utcnow(), output=output)

    except Exception as ex:
        publish_failed = True
        raise ex
    finally:
        if publish_failed:
            task.status = previous_task_status
            task.save()
Пример #14
0
    def validate_input_models(task, allow_only_public=False):
        if not task.models.input:
            return

        company = None if allow_only_public else task.company
        model_ids = set(m.model for m in task.models.input)
        models = Model.objects(
            Q(id__in=model_ids)
            & get_company_or_none_constraint(company)).only("id")
        missing = model_ids - {m.id for m in models}
        if missing:
            raise errors.bad_request.InvalidModelId(models=missing)

        return
Пример #15
0
def get_outputs_for_deletion(task, force=False):
    with TimingContext("mongo", "get_task_models"):
        models = TaskOutputs(
            attrgetter("ready"),
            Model,
            Model.objects(task=task.id).only("id", "task", "ready"),
        )
        if not force and models.published:
            raise errors.bad_request.TaskCannotBeDeleted(
                "has output models, use force=True",
                task=task.id,
                models=len(models.published),
            )

    if task.output.model:
        output_model = get_output_model(task, force)
        if output_model:
            if output_model.ready:
                models.published.append(output_model)
            else:
                models.draft.append(output_model)

    if models.draft:
        with TimingContext("mongo", "get_execution_models"):
            model_ids = [m.id for m in models.draft]
            dependent_tasks = Task.objects(execution__model__in=model_ids).only(
                "id", "execution.model"
            )
            busy_models = [t.execution.model for t in dependent_tasks]
            models.draft[:] = [m for m in models.draft if m.id not in busy_models]

    with TimingContext("mongo", "get_task_children"):
        tasks = Task.objects(parent=task.id).only("id", "parent", "status")
        published_tasks = [
            task for task in tasks if task.status == TaskStatus.published
        ]
        if not force and published_tasks:
            raise errors.bad_request.TaskCannotBeDeleted(
                "has children, use force=True", task=task.id, children=published_tasks
            )
    return models, tasks
Пример #16
0
def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
    """
    Delete project models and update the tasks from other projects
    that reference them to reference None.
    """
    with TimingContext("mongo", "delete_models"):
        models = Model.objects(project__in=projects).only("task", "id", "uri")
        if not models:
            return 0, set()

        model_ids = list({m.id for m in models})

        Task._get_collection().update_many(
            filter={
                "project": {"$nin": projects},
                "models.input.model": {"$in": model_ids},
            },
            update={"$set": {"models.input.$[elem].model": None}},
            array_filters=[{"elem.model": {"$in": model_ids}}],
            upsert=False,
        )

        model_tasks = list({m.task for m in models if m.task})
        if model_tasks:
            Task._get_collection().update_many(
                filter={
                    "_id": {"$in": model_tasks},
                    "project": {"$nin": projects},
                    "models.output.model": {"$in": model_ids},
                },
                update={"$set": {"models.output.$[elem].model": None}},
                array_filters=[{"elem.model": {"$in": model_ids}}],
                upsert=False,
            )

        urls = {m.uri for m in models if m.uri}
        deleted = models.delete()
        return deleted, urls
Пример #17
0
    def delete_model(cls, model_id: str, company_id: str,
                     force: bool) -> Tuple[int, Model]:
        model = cls.get_company_model_by_id(
            company_id=company_id,
            model_id=model_id,
            only_fields=("id", "task", "project", "uri"),
        )
        deleted_model_id = f"{deleted_prefix}{model_id}"

        using_tasks = Task.objects(models__input__model=model_id).only("id")
        if using_tasks:
            if not force:
                raise errors.bad_request.ModelInUse(
                    "as execution model, use force=True to delete",
                    num_tasks=len(using_tasks),
                )
            # update deleted model id in using tasks
            Task._get_collection().update_many(
                filter={"_id": {
                    "$in": [t.id for t in using_tasks]
                }},
                update={
                    "$set": {
                        "models.input.$[elem].model": deleted_model_id
                    }
                },
                array_filters=[{
                    "elem.model": model_id
                }],
                upsert=False,
            )

        if model.task:
            task = Task.objects(id=model.task).first()
            if task and task.status == TaskStatus.published:
                if not force:
                    raise errors.bad_request.ModelCreatingTaskExists(
                        "and published, use force=True to delete",
                        task=model.task)
                if task.models.output and model_id in task.models.output:
                    now = datetime.utcnow()
                    Task._get_collection().update_one(
                        filter={
                            "_id": model.task,
                            "models.output.model": model_id
                        },
                        update={
                            "$set": {
                                "models.output.$[elem].model":
                                deleted_model_id,
                                "output.error":
                                f"model deleted on {now.isoformat()}",
                            },
                            "last_change": now,
                        },
                        array_filters=[{
                            "elem.model": model_id
                        }],
                        upsert=False,
                    )

        del_count = Model.objects(id=model_id, company=company_id).delete()
        return del_count, model
Пример #18
0
    def get_model_metadata_distinct_values(
        self,
        company_id: str,
        project_ids: Sequence[str],
        key: str,
        include_subprojects: bool,
        allow_public: bool = True,
    ) -> ParamValues:
        company_constraint = self._get_company_constraint(
            company_id, allow_public)
        project_constraint = self._get_project_constraint(
            project_ids, include_subprojects)
        key_path = f"metadata.{ParameterKeyEscaper.escape(key)}"
        last_updated_model = (Model.objects(
            **company_constraint,
            **project_constraint,
            **{
                f"{key_path.replace('.', '__')}__exists": True
            },
        ).only("last_update").order_by("-last_update").limit(1).first())
        if not last_updated_model:
            return 0, []

        redis_key = f"modelmetadata_values_{company_id}_{'_'.join(project_ids)}_{key}_{allow_public}"
        last_update = last_updated_model.last_update or datetime.utcnow()
        cached_res = self._get_cached_param_values(key=redis_key,
                                                   last_update=last_update)
        if cached_res:
            return cached_res

        max_values = config.get("services.models.metadata_values.max_count",
                                100)
        pipeline = [
            {
                "$match": {
                    **company_constraint,
                    **project_constraint,
                    key_path: {
                        "$exists": True
                    },
                }
            },
            {
                "$project": {
                    "value": f"${key_path}.value"
                }
            },
            {
                "$group": {
                    "_id": "$value"
                }
            },
            {
                "$sort": {
                    "_id": 1
                }
            },
            {
                "$limit": max_values
            },
            {
                "$group": {
                    "_id": 1,
                    "total": {
                        "$sum": 1
                    },
                    "results": {
                        "$push": "$$ROOT._id"
                    },
                }
            },
        ]

        result = next(
            Model.aggregate(pipeline, collation=Model._numeric_locale), None)
        if not result:
            return 0, []

        total = int(result.get("total", 0))
        values = result.get("results", [])

        ttl = config.get("services.models.metadata_values.cache_ttl_sec",
                         86400)
        cached = dict(last_update=last_update.timestamp(),
                      total=total,
                      values=values)
        self.redis.setex(redis_key, ttl, json.dumps(cached))

        return total, values