def params_unprepare_from_saved(fields, copy_to_legacy=False): """ Unescape all section and param names for hyper params and configuration If copy_to_legacy is set then copy hyperparams and configuration data to the legacy location for the old clients """ for param_field in ("hyperparams", "configuration"): params = fields.get(param_field) if params: unescaped_params = { ParameterKeyEscaper.unescape(key): {ParameterKeyEscaper.unescape(k): v for k, v in value.items()} if isinstance(value, dict) else value for key, value in params.items() } fields[param_field] = unescaped_params if copy_to_legacy: for new_params_field, old_params_field, use_sections in ( ("hyperparams", ("execution", "parameters"), True), ("configuration", ("execution", "model_desc"), False), ): legacy_params = _get_legacy_params(fields.get(new_params_field), with_sections=use_sections) if legacy_params: nested_set( fields, old_params_field, { _get_full_param_name(p): p["value"] for p in legacy_params }, )
def _process_path(path: str): """ Frontend does a partial escaping on the path so the all '.' in section and key names are escaped Need to unescape and apply a full mongo escaping """ parts = path.split(".") if len(parts) < 2 or len(parts) > 4: raise errors.bad_request.ValidationError("invalid task field", path=path) return ".".join( ParameterKeyEscaper.escape(ParameterKeyEscaper.unescape(p)) for p in parts)
def get_configuration_names(cls, company_id: str, task_ids: Sequence[str]) -> Dict[str, list]: with TimingContext("mongo", "get_configuration_names"): pipeline = [ { "$match": { "company": { "$in": [None, "", company_id] }, "_id": { "$in": task_ids }, } }, { "$project": { "items": { "$objectToArray": "$configuration" } } }, { "$unwind": "$items" }, { "$group": { "_id": "$_id", "names": { "$addToSet": "$items.k" } } }, ] tasks = Task.aggregate(pipeline) return { task["_id"]: { "names": sorted( ParameterKeyEscaper.unescape(name) for name in task["names"]) } for task in tasks }
def unescape_metadata(call: APICall, documents: Union[dict, Sequence[dict]]): """ Unescape special characters in metadata keys """ if isinstance(documents, dict): documents = [documents] old_client = call.requested_endpoint_version <= PartialVersion("2.16") for doc in documents: if old_client and "metadata" in doc: doc["metadata"] = [] continue metadata = doc.get("metadata") if not metadata: continue doc["metadata"] = { ParameterKeyEscaper.unescape(k): v for k, v in metadata.items() }
def get_aggregated_project_parameters( cls, company_id, project_ids: Sequence[str], include_subprojects: bool, page: int = 0, page_size: int = 500, ) -> Tuple[int, int, Sequence[dict]]: page = max(0, page) page_size = max(1, page_size) pipeline = [ { "$match": { **cls._get_company_constraint(company_id), **cls._get_project_constraint(project_ids, include_subprojects), "hyperparams": { "$exists": True, "$gt": {} }, } }, { "$project": { "sections": { "$objectToArray": "$hyperparams" } } }, { "$unwind": "$sections" }, { "$project": { "section": "$sections.k", "names": { "$objectToArray": "$sections.v" }, } }, { "$unwind": "$names" }, { "$group": { "_id": { "section": "$section", "name": "$names.k" } } }, { "$sort": OrderedDict({ "_id.section": 1, "_id.name": 1 }) }, { "$skip": page * page_size }, { "$limit": page_size }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT" }, } }, ] result = next(Task.aggregate(pipeline), None) total = 0 remaining = 0 results = [] if result: total = int(result.get("total", -1)) results = [{ "section": ParameterKeyEscaper.unescape(nested_get(r, ("_id", "section"))), "name": ParameterKeyEscaper.unescape(nested_get(r, ("_id", "name"))), } for r in result.get("results", [])] remaining = max(0, total - (len(results) + page * page_size)) return total, remaining, results
def get_model_metadata_keys( cls, company_id, project_ids: Sequence[str], include_subprojects: bool, page: int = 0, page_size: int = 500, ) -> Tuple[int, int, Sequence[dict]]: page = max(0, page) page_size = max(1, page_size) pipeline = [ { "$match": { **cls._get_company_constraint(company_id), **cls._get_project_constraint(project_ids, include_subprojects), "metadata": { "$exists": True, "$gt": {} }, } }, { "$project": { "metadata": { "$objectToArray": "$metadata" } } }, { "$unwind": "$metadata" }, { "$group": { "_id": "$metadata.k" } }, { "$sort": { "_id": 1 } }, { "$skip": page * page_size }, { "$limit": page_size }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT" }, } }, ] result = next(Model.aggregate(pipeline), None) total = 0 remaining = 0 results = [] if result: total = int(result.get("total", -1)) results = [ ParameterKeyEscaper.unescape(r.get("_id")) for r in result.get("results", []) ] remaining = max(0, total - (len(results) + page * page_size)) return total, remaining, results
def unescape_dict(data: dict) -> dict: if not data: return data return {ParameterKeyEscaper.unescape(k): v for k, v in data.items()}
def get_aggregated_project_parameters( company_id, project_ids: Sequence[str] = None, page: int = 0, page_size: int = 500, ) -> Tuple[int, int, Sequence[dict]]: page = max(0, page) page_size = max(1, page_size) pipeline = [ { "$match": { "company": { "$in": [None, "", company_id] }, "hyperparams": { "$exists": True, "$gt": {} }, **({ "project": { "$in": project_ids } } if project_ids else {}), } }, { "$project": { "sections": { "$objectToArray": "$hyperparams" } } }, { "$unwind": "$sections" }, { "$project": { "section": "$sections.k", "names": { "$objectToArray": "$sections.v" }, } }, { "$unwind": "$names" }, { "$group": { "_id": { "section": "$section", "name": "$names.k" } } }, { "$sort": OrderedDict({ "_id.section": 1, "_id.name": 1 }) }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT" }, } }, { "$project": { "total": 1, "results": { "$slice": ["$results", page * page_size, page_size] }, } }, ] with translate_errors_context(): result = next(Task.aggregate(pipeline), None) total = 0 remaining = 0 results = [] if result: total = int(result.get("total", -1)) results = [{ "section": ParameterKeyEscaper.unescape(dpath.get(r, "_id/section")), "name": ParameterKeyEscaper.unescape(dpath.get(r, "_id/name")), } for r in result.get("results", [])] remaining = max(0, total - (len(results) + page * page_size)) return total, remaining, results