Exemplo n.º 1
0
 def get_status_counts(project_id, section):
     return {
         "total_runtime": nested_get(runtime, (project_id, section), 0),
         "status_count": nested_get(
             status_count, (project_id, section), default_counts
         ),
     }
Exemplo n.º 2
0
 def sum_status_count(a: Mapping[str, Mapping],
                      b: Mapping[str, Mapping]) -> Dict[str, dict]:
     return {
         section: {
             status: nested_get(a, (section, status), default=0) +
             nested_get(b, (section, status), default=0)
             for status in set(a.get(section, {}))
             | set(b.get(section, {}))
         }
         for section in set(a) | set(b)
     }
Exemplo n.º 3
0
def _migrate_docker_cmd(db: Database):
    tasks: Collection = db["task"]

    docker_cmd_field = "execution.docker_cmd"
    query = {docker_cmd_field: {"$exists": True}}

    for doc in tasks.find(filter=query, projection=(docker_cmd_field, )):
        set_commands = {}
        docker_cmd = nested_get(doc, docker_cmd_field.split("."))
        if docker_cmd:
            image, _, arguments = docker_cmd.partition(" ")
            set_commands["container"] = {
                "image": image,
                "arguments": arguments
            }

        tasks.update_one(
            {"_id": doc["_id"]},
            {
                "$unset": {
                    docker_cmd_field: 1
                },
                **({
                    "$set": set_commands
                } if set_commands else {}),
            },
        )
Exemplo n.º 4
0
def unescape_dict_field(fields: dict, path: Union[str, Sequence[str]]):
    if isinstance(path, str):
        path = (path,)

    data = nested_get(fields, path)
    if not data or not isinstance(data, dict):
        return

    nested_set(fields, path, unescape_dict(data))
Exemplo n.º 5
0
def _migrate_task_models(db: Database):
    """
    Move the execution and output models to new models.input and output lists
    """
    tasks: Collection = db["task"]

    models_field = "models"
    now = datetime.utcnow()

    fields = {
        TaskModelTypes.input: "execution.model",
        TaskModelTypes.output: "output.model",
    }
    query = {"$or": [{field: {"$exists": True}} for field in fields.values()]}
    for doc in tasks.find(filter=query,
                          projection=[*fields.values(), models_field]):
        set_commands = {}
        for mode, field in fields.items():
            value = nested_get(doc, field.split("."))
            if value:
                name = TaskModelNames[mode]
                model_item = {"model": value, "name": name, "updated": now}
                existing_models = nested_get(doc, (models_field, mode),
                                             default=[])
                existing_models = (
                    m for m in existing_models
                    if m.get("name") != name and m.get("model") != value)
                if mode == TaskModelTypes.input:
                    updated_models = [model_item, *existing_models]
                else:
                    updated_models = [*existing_models, model_item]
                set_commands[f"{models_field}.{mode}"] = updated_models

        tasks.update_one(
            {"_id": doc["_id"]},
            {
                "$unset": {field: 1
                           for field in fields.values()},
                **({
                    "$set": set_commands
                } if set_commands else {}),
            },
        )
Exemplo n.º 6
0
def artifacts_prepare_for_save(fields: dict):
    artifacts_field = ("execution", "artifacts")
    artifacts = nested_get(fields, artifacts_field)
    if artifacts is None:
        return

    nested_set(fields,
               artifacts_field,
               value={get_artifact_id(a): a
                      for a in artifacts})
Exemplo n.º 7
0
def artifacts_unprepare_from_saved(fields):
    artifacts_field = ("execution", "artifacts")
    artifacts = nested_get(fields, artifacts_field)
    if artifacts is None:
        return

    nested_set(
        fields,
        artifacts_field,
        value=sorted(artifacts.values(), key=itemgetter("key")),
    )
Exemplo n.º 8
0
    def prepare_for_save(cls, call: APICall, fields: dict):
        if call.requested_endpoint_version >= cls.max_version:
            return

        docker_cmd = nested_get(fields, cls.field)
        if docker_cmd is not None:
            image, _, arguments = docker_cmd.partition(" ")
            nested_set(fields, ("container", "image"), value=image)
            nested_set(fields, ("container", "arguments"), value=arguments)

        nested_delete(fields, cls.field)
Exemplo n.º 9
0
def params_prepare_for_save(fields: dict, previous_task: Task = None):
    """
    If legacy hyper params or configuration is passed then replace the corresponding section in the new structure
    Escape all the section and param names for hyper params and configuration to make it mongo sage
    """
    for old_params_field, new_params_field, default_section in (
        (("execution", "parameters"), "hyperparams",
         hyperparams_default_section),
        (("execution", "model_desc"), "configuration", None),
    ):
        legacy_params = nested_get(fields, old_params_field)
        if legacy_params is None:
            continue

        if (not fields.get(new_params_field) and previous_task
                and previous_task[new_params_field]):
            previous_data = previous_task.to_proper_dict().get(
                new_params_field)
            removed = _remove_legacy_params(previous_data,
                                            with_sections=default_section
                                            is not None)
            if not legacy_params and not removed:
                # if we only need to delete legacy fields from the db
                # but they are not there then there is no point to proceed
                continue

            fields_update = {new_params_field: previous_data}
            params_unprepare_from_saved(fields_update)
            fields.update(fields_update)

        for full_name, value in legacy_params.items():
            section, name = split_param_name(full_name, default_section)
            new_path = list(filter(None, (new_params_field, section, name)))
            new_param = dict(name=name,
                             type=hyperparams_legacy_type,
                             value=str(value))
            if section is not None:
                new_param["section"] = section
            nested_set(fields, new_path, new_param)
        nested_delete(fields, old_params_field)

    for param_field in ("hyperparams", "configuration"):
        params = fields.get(param_field)
        if params:
            escaped_params = {
                ParameterKeyEscaper.escape(key):
                {ParameterKeyEscaper.escape(k): v
                 for k, v in value.items()}
                if isinstance(value, dict) else value
                for key, value in params.items()
            }
            fields[param_field] = escaped_params
Exemplo n.º 10
0
def migrate_backend(db: Database):
    collection: Collection = db["task"]
    artifacts_field = "execution.artifacts"
    query = {artifacts_field: {"$type": 4}}
    for doc in collection.find(filter=query, projection=(artifacts_field, )):
        artifacts = nested_get(doc, artifacts_field.split("."))
        if not isinstance(artifacts, list):
            continue

        new_artifacts = {get_artifact_id(a): a for a in artifacts}
        collection.update_one({"_id": doc["_id"]},
                              {"$set": {
                                  artifacts_field: new_artifacts
                              }})
Exemplo n.º 11
0
    def unprepare_from_saved(
        cls, call: APICall, tasks_data: Union[Sequence[dict], dict]
    ):
        if call.requested_endpoint_version >= cls.max_version:
            return

        if isinstance(tasks_data, dict):
            tasks_data = [tasks_data]

        for task in tasks_data:
            for mode, field in cls.mode_to_fields.items():
                models = nested_get(task, (cls.models_field, mode))
                if not models:
                    continue

                model = models[0] if mode == TaskModelTypes.input else models[-1]
                if model:
                    nested_set(task, field, model.get("model"))
Exemplo n.º 12
0
    def prepare_for_save(cls, call: APICall, fields: dict):
        if call.requested_endpoint_version >= cls.max_version:
            return

        for mode, field in cls.mode_to_fields.items():
            value = nested_get(fields, field)
            if value is None:
                continue
            val = [
                dict(
                    name=TaskModelNames[mode],
                    model=value,
                    updated=datetime.utcnow(),
                )
            ] if value else []
            nested_set(fields, (cls.models_field, mode), value=val)

            nested_delete(fields, field)
Exemplo n.º 13
0
def _migrate_model_labels(db: Database):
    tasks: Collection = db["task"]

    fields = ("execution.model_labels", "container")
    query = {"$or": [{field: {"$nin": [None, {}]}} for field in fields]}

    for doc in tasks.find(filter=query, projection=fields):
        set_commands = {}
        for field in fields:
            data = nested_get(doc, field.split("."))
            if not data:
                continue
            escaped = escape_dict(data)
            if data == escaped:
                continue
            set_commands[field] = escaped

        if set_commands:
            tasks.update_one({"_id": doc["_id"]}, {"$set": set_commands})
Exemplo n.º 14
0
        def get_status_counts(project_id, section):
            project_runtime = runtime.get(project_id, {})
            project_section_statuses = nested_get(status_count,
                                                  (project_id, section),
                                                  default=default_counts)

            def get_time_or_none(value):
                return value if value != datetime.min else None

            return {
                "status_count":
                project_section_statuses,
                "total_tasks":
                sum(project_section_statuses.values()),
                "total_runtime":
                project_runtime.get(section, 0),
                "completed_tasks_24h":
                project_runtime.get(f"{section}_recently_completed", 0),
                "last_task_run":
                get_time_or_none(
                    project_runtime.get(f"{section}_max_task_started",
                                        datetime.min)),
            }
Exemplo n.º 15
0
    def _upgrade_task_data(task_data: dict) -> dict:
        """
        Migrate from execution/parameters and model_desc to hyperparams and configuration fiields
        Upgrade artifacts list to dict
        Migrate from execution.model and output.model to the new models field
        Move docker_cmd contents into the container field
        :param task_data: Upgraded in place
        :return: The upgraded task data
        """
        for old_param_field, new_param_field, default_section in (
            ("execution.parameters", "hyperparams",
             hyperparams_default_section),
            ("execution.model_desc", "configuration", None),
        ):
            legacy_path = old_param_field.split(".")
            legacy = nested_get(task_data, legacy_path)
            if legacy:
                for full_name, value in legacy.items():
                    section, name = split_param_name(full_name,
                                                     default_section)
                    new_path = list(
                        filter(None, (new_param_field, section, name)))
                    if not nested_get(task_data, new_path):
                        new_param = dict(name=name,
                                         type=hyperparams_legacy_type,
                                         value=str(value))
                        if section is not None:
                            new_param["section"] = section
                        nested_set(task_data, path=new_path, value=new_param)
            nested_delete(task_data, legacy_path)

        artifacts_path = ("execution", "artifacts")
        artifacts = nested_get(task_data, artifacts_path)
        if isinstance(artifacts, list):
            nested_set(
                task_data,
                path=artifacts_path,
                value={get_artifact_id(a): a
                       for a in artifacts},
            )

        models = task_data.get("models", {})
        now = datetime.utcnow()
        for old_field, type_ in (
            ("execution.model", TaskModelTypes.input),
            ("output.model", TaskModelTypes.output),
        ):
            old_path = old_field.split(".")
            old_model = nested_get(task_data, old_path)
            new_models = models.get(type_, [])
            name = TaskModelNames[type_]
            if old_model and not any(
                    m for m in new_models
                    if m.get("model") == old_model or m.get("name") == name):
                model_item = {"model": old_model, "name": name, "updated": now}
                if type_ == TaskModelTypes.input:
                    new_models = [model_item, *new_models]
                else:
                    new_models = [*new_models, model_item]
            models[type_] = new_models
            nested_delete(task_data, old_path)
        task_data["models"] = models

        docker_cmd_path = ("execution", "docker_cmd")
        docker_cmd = nested_get(task_data, docker_cmd_path)
        if docker_cmd and not task_data.get("container"):
            image, _, arguments = docker_cmd.partition(" ")
            task_data["container"] = {"image": image, "arguments": arguments}
        nested_delete(task_data, docker_cmd_path)

        return task_data
Exemplo n.º 16
0
    def get_aggregated_project_parameters(
        cls,
        company_id,
        project_ids: Sequence[str],
        include_subprojects: bool,
        page: int = 0,
        page_size: int = 500,
    ) -> Tuple[int, int, Sequence[dict]]:
        page = max(0, page)
        page_size = max(1, page_size)
        pipeline = [
            {
                "$match": {
                    **cls._get_company_constraint(company_id),
                    **cls._get_project_constraint(project_ids, include_subprojects),
                    "hyperparams": {
                        "$exists": True,
                        "$gt": {}
                    },
                }
            },
            {
                "$project": {
                    "sections": {
                        "$objectToArray": "$hyperparams"
                    }
                }
            },
            {
                "$unwind": "$sections"
            },
            {
                "$project": {
                    "section": "$sections.k",
                    "names": {
                        "$objectToArray": "$sections.v"
                    },
                }
            },
            {
                "$unwind": "$names"
            },
            {
                "$group": {
                    "_id": {
                        "section": "$section",
                        "name": "$names.k"
                    }
                }
            },
            {
                "$sort": OrderedDict({
                    "_id.section": 1,
                    "_id.name": 1
                })
            },
            {
                "$skip": page * page_size
            },
            {
                "$limit": page_size
            },
            {
                "$group": {
                    "_id": 1,
                    "total": {
                        "$sum": 1
                    },
                    "results": {
                        "$push": "$$ROOT"
                    },
                }
            },
        ]

        result = next(Task.aggregate(pipeline), None)

        total = 0
        remaining = 0
        results = []

        if result:
            total = int(result.get("total", -1))
            results = [{
                "section":
                ParameterKeyEscaper.unescape(nested_get(r,
                                                        ("_id", "section"))),
                "name":
                ParameterKeyEscaper.unescape(nested_get(r, ("_id", "name"))),
            } for r in result.get("results", [])]
            remaining = max(0, total - (len(results) + page * page_size))

        return total, remaining, results
Exemplo n.º 17
0
    def _import_entity(
        cls,
        f: IO[bytes],
        full_name: str,
        company_id: str,
        user_id: str,
        metadata: Mapping[str, Any],
    ) -> Optional[Sequence[Task]]:
        cls_ = cls._get_entity_type(full_name)
        print(f"Writing {cls_.__name__.lower()}s into database")
        tasks = []
        override_project_count = 0
        for item in cls.json_lines(f):
            if cls_ == cls.task_cls:
                task_data = json.loads(item)
                artifacts_path = ("execution", "artifacts")
                artifacts = nested_get(task_data, artifacts_path)
                if isinstance(artifacts, list):
                    nested_set(
                        task_data,
                        artifacts_path,
                        value={get_artifact_id(a): a
                               for a in artifacts},
                    )
                    item = json.dumps(task_data)

            doc = cls_.from_json(item, created=True)
            if hasattr(doc, "user"):
                doc.user = user_id
            if hasattr(doc, "company"):
                doc.company = company_id
            if isinstance(doc, cls.project_cls):
                override_project_name = metadata.get("project_name", None)
                if override_project_name:
                    if override_project_count:
                        override_project_name = (
                            f"{override_project_name} {override_project_count + 1}"
                        )
                    override_project_count += 1
                    doc.name = override_project_name

                doc.logo_url = metadata.get("logo_url", None)
                doc.logo_blob = metadata.get("logo_blob", None)

                cls_.objects(
                    company=company_id, name=doc.name, id__ne=doc.id
                ).update(
                    set__name=
                    f"{doc.name}_{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}"
                )

            doc.save()

            if isinstance(doc, cls.task_cls):
                tasks.append(doc)
                cls.event_bll.delete_task_events(company_id,
                                                 doc.id,
                                                 allow_locked=True)

        if tasks:
            return tasks