def _process_include_subprojects(call_data: dict): include_subprojects = call_data.pop("include_subprojects", False) project_ids = call_data.get("project") if not project_ids or not include_subprojects: return if not isinstance(project_ids, list): project_ids = [project_ids] call_data["project"] = project_ids_with_children(project_ids)
def _resolve_entities( cls, experiments: Sequence[str] = None, projects: Sequence[str] = None, task_statuses: Sequence[str] = None, ) -> Dict[Type[mongoengine.Document], Set[mongoengine.Document]]: entities = defaultdict(set) if projects: print("Reading projects...") projects = project_ids_with_children(projects) entities[cls.project_cls].update( cls._resolve_type(cls.project_cls, projects) ) print("--> Reading project experiments...") query = Q( project__in=list( set(filter(None, (p.id for p in entities[cls.project_cls]))) ), system_tags__nin=[EntityVisibility.archived.value], ) if task_statuses: query &= Q(status__in=list(set(task_statuses))) objs = cls.task_cls.objects(query) entities[cls.task_cls].update( o for o in objs if o.id not in (experiments or []) ) if experiments: print("Reading experiments...") entities[cls.task_cls].update(cls._resolve_type(cls.task_cls, experiments)) print("--> Reading experiments projects...") objs = cls.project_cls.objects( id__in=list( set(filter(None, (p.project for p in entities[cls.task_cls]))) ) ) project_ids = {p.id for p in entities[cls.project_cls]} entities[cls.project_cls].update(o for o in objs if o.id not in project_ids) cls._check_projects_hierarchy(entities[cls.project_cls]) task_models = chain.from_iterable( models for task in entities[cls.task_cls] if task.models for models in (task.models.input, task.models.output) if models ) model_ids = {tm.model for tm in task_models} if model_ids: print("Reading models...") entities[cls.model_cls] = set(cls.model_cls.objects(id__in=list(model_ids))) return entities
def _get_tags_from_db( self, company_id: str, field: str, project: str = None, filter_: Dict[str, Sequence[str]] = None, ) -> set: query = Q(company=company_id) if filter_: for name, vals in filter_.items(): if vals: query &= GetMixin.get_list_field_query(name, vals) if project: query &= Q(project__in=project_ids_with_children([project])) return self.db_cls.objects(query).distinct(field)
def get_hyperparam_distinct_values( self, company_id: str, project_ids: Sequence[str], section: str, name: str, include_subprojects: bool, allow_public: bool = True, ) -> HyperParamValues: if allow_public: company_constraint = {"company": {"$in": [None, "", company_id]}} else: company_constraint = {"company": company_id} if project_ids: if include_subprojects: project_ids = project_ids_with_children(project_ids) project_constraint = {"project": {"$in": project_ids}} else: project_constraint = {} key_path = f"hyperparams.{ParameterKeyEscaper.escape(section)}.{ParameterKeyEscaper.escape(name)}" last_updated_task = (Task.objects( **company_constraint, **project_constraint, **{ f"{key_path.replace('.', '__')}__exists": True }, ).only("last_update").order_by("-last_update").limit(1).first()) if not last_updated_task: return 0, [] redis_key = f"hyperparam_values_{company_id}_{'_'.join(project_ids)}_{section}_{name}_{allow_public}" last_update = last_updated_task.last_update or datetime.utcnow() cached_res = self._get_cached_hyperparam_values( key=redis_key, last_update=last_update) if cached_res: return cached_res max_values = config.get("services.tasks.hyperparam_values.max_count", 100) pipeline = [ { "$match": { **company_constraint, **project_constraint, key_path: { "$exists": True }, } }, { "$project": { "value": f"${key_path}.value" } }, { "$group": { "_id": "$value" } }, { "$sort": { "_id": 1 } }, { "$limit": max_values }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT._id" }, } }, ] result = next(Task.aggregate(pipeline, collation=Task._numeric_locale), None) if not result: return 0, [] total = int(result.get("total", 0)) values = result.get("results", []) ttl = config.get("services.tasks.hyperparam_values.cache_ttl_sec", 86400) cached = dict(last_update=last_update.timestamp(), total=total, values=values) self.redis.setex(redis_key, ttl, json.dumps(cached)) return total, values
def get_aggregated_project_parameters( company_id, project_ids: Sequence[str], include_subprojects: bool, page: int = 0, page_size: int = 500, ) -> Tuple[int, int, Sequence[dict]]: if project_ids: if include_subprojects: project_ids = project_ids_with_children(project_ids) project_constraint = {"project": {"$in": project_ids}} else: project_constraint = {} page = max(0, page) page_size = max(1, page_size) pipeline = [ { "$match": { "company": { "$in": [None, "", company_id] }, "hyperparams": { "$exists": True, "$gt": {} }, **project_constraint, } }, { "$project": { "sections": { "$objectToArray": "$hyperparams" } } }, { "$unwind": "$sections" }, { "$project": { "section": "$sections.k", "names": { "$objectToArray": "$sections.v" }, } }, { "$unwind": "$names" }, { "$group": { "_id": { "section": "$section", "name": "$names.k" } } }, { "$sort": OrderedDict({ "_id.section": 1, "_id.name": 1 }) }, { "$skip": page * page_size }, { "$limit": page_size }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT" }, } }, ] result = next(Task.aggregate(pipeline), None) total = 0 remaining = 0 results = [] if result: total = int(result.get("total", -1)) results = [{ "section": ParameterKeyEscaper.unescape(dpath.get(r, "_id/section")), "name": ParameterKeyEscaper.unescape(dpath.get(r, "_id/name")), } for r in result.get("results", [])] remaining = max(0, total - (len(results) + page * page_size)) return total, remaining, results
def get_unique_metric_variants(company_id, project_ids: Sequence[str], include_subprojects: bool): if project_ids: if include_subprojects: project_ids = project_ids_with_children(project_ids) project_constraint = {"project": {"$in": project_ids}} else: project_constraint = {} pipeline = [ { "$match": dict( company={"$in": [None, "", company_id]}, **project_constraint, ) }, { "$project": { "metrics": { "$objectToArray": "$last_metrics" } } }, { "$unwind": "$metrics" }, { "$project": { "metric": "$metrics.k", "variants": { "$objectToArray": "$metrics.v" }, } }, { "$unwind": "$variants" }, { "$group": { "_id": { "metric": "$variants.v.metric", "variant": "$variants.v.variant", }, "metrics": { "$addToSet": { "metric": "$variants.v.metric", "metric_hash": "$metric", "variant": "$variants.v.variant", "variant_hash": "$variants.k", } }, } }, { "$sort": OrderedDict({ "_id.metric": 1, "_id.variant": 1 }) }, ] with translate_errors_context(): result = Task.aggregate(pipeline) return [r["metrics"][0] for r in result]