def _filter_request(self, request: request.Request, queryset: QuerySet) -> QuerySet: if request.GET.get("id"): ids = request.GET["id"].split(",") queryset = queryset.filter(id__in=ids) if request.GET.get("uuid"): uuids = request.GET["uuid"].split(",") queryset = queryset.filter(uuid__in=uuids) if request.GET.get("search"): parts = request.GET["search"].split(" ") contains = [] for part in parts: if ":" in part: matcher, key = part.split(":") if matcher == "has": # Matches for example has:email or has:name queryset = queryset.filter(properties__has_key=key) else: contains.append(part) queryset = queryset.filter( Q(properties__icontains=" ".join(contains)) | Q(persondistinctid__distinct_id__icontains=" ".join( contains))).distinct("id") if request.GET.get("cohort"): queryset = queryset.filter(cohort__id=request.GET["cohort"]) if request.GET.get("properties"): filter = Filter( data={"properties": json.loads(request.GET["properties"])}) queryset = queryset.filter( properties_to_Q(filter.properties, team_id=self.team_id)) queryset = queryset.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return queryset
def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet: if params.get("include_count"): queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS)) queryset = queryset.prefetch_related( Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) return queryset.filter(team_id=team_id).order_by("-id")
def _filter_request(self, request: request.Request, queryset: QuerySet, team: Team) -> QuerySet: if request.GET.get("id"): ids = request.GET["id"].split(",") queryset = queryset.filter(id__in=ids) if request.GET.get("uuid"): uuids = request.GET["uuid"].split(",") queryset = queryset.filter(uuid__in=uuids) if request.GET.get("search"): parts = request.GET["search"].split(" ") contains = [] for part in parts: if ":" in part: queryset = queryset.filter( properties__has_key=part.split(":")[1]) else: contains.append(part) queryset = queryset.filter( Q(properties__icontains=" ".join(contains)) | Q(persondistinctid__distinct_id__icontains=" ".join( contains))).distinct("id") if request.GET.get("cohort"): queryset = queryset.filter(cohort__id=request.GET["cohort"]) if request.GET.get("properties"): queryset = queryset.filter( Filter(data={ "properties": json.loads(request.GET["properties"]) }).properties_to_Q(team_id=team.pk)) queryset = queryset.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return queryset
def prefetch_league_tournament_related(qs: QuerySet) -> QuerySet: return qs.prefetch_related( 'tournament__participants', Prefetch( 'tournament__participants__deck', queryset = PoolDeck.objects.all().only( 'id', 'name', 'created_at', 'pool_id', ) ), Prefetch( 'tournament__participants__player', queryset = USER_QUERY_SET, ), 'tournament__participants__deck__pool', Prefetch( 'tournament__participants__deck__pool__user', queryset = USER_QUERY_SET, ), 'tournament__rounds', 'tournament__rounds__matches', 'tournament__rounds__matches__seats', 'tournament__rounds__matches__seats__participant', Prefetch( 'tournament__rounds__matches__seats__participant__player', queryset = USER_QUERY_SET, ), Prefetch( 'tournament__rounds__matches__seats__participant__deck', queryset = PoolDeck.objects.all().only( 'id', 'name', 'created_at', 'pool_id', ) ), Prefetch( 'tournament__rounds__matches__seats__participant__deck__pool__user', queryset = USER_QUERY_SET, ), 'tournament__rounds__matches__seats__result', 'tournament__rounds__matches__result', 'tournament__results', 'tournament__results__participant', Prefetch( 'tournament__results__participant__player', queryset = USER_QUERY_SET, ), 'tournament__results__participant__deck', 'tournament__results__participant__deck__pool', Prefetch( 'tournament__results__participant__deck__pool__user', queryset = USER_QUERY_SET, ), )
def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet: if params.get(TREND_FILTER_TYPE_ACTIONS): queryset = queryset.filter( pk__in=[action.id for action in Filter({"actions": json.loads(params.get("actions", "[]"))}).actions] ) if params.get("include_count"): queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS)) queryset = queryset.prefetch_related(Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) return queryset.filter(team_id=team_id).order_by("-id")
def optimize_query(query: QuerySet, meta: FieldMeta) -> QuerySet: model = query.model only_cols = get_only_cols(model, meta.sections) select_related_cols, prefetchs = get_related_cols(model, meta.sub_fields) if only_cols: query = query.only(*only_cols) if select_related_cols: query = query.select_related(*select_related_cols) if prefetchs: query = query.prefetch_related(*prefetchs) return query
def prefetch_current_user_permissions(self, queryset: models.QuerySet): """Prefetch permissions for the current user.""" user = self.request.user filters = models.Q(user__username=settings.ANONYMOUS_USER_NAME) if not user.is_anonymous: filters |= models.Q(user=user) | models.Q( group__in=user.groups.all()) qs_permission_model = self.qs_permission_model.filter(filters) return queryset.prefetch_related( models.Prefetch("permission_group__permissions", queryset=qs_permission_model))
def query_optimization(self, source: QuerySet, fields, prefix=''): self.check_if_requesting_missing_fields(fields) for field_name, data in fields.items(): # Explicit prefetch fields_to_prefetch = getattr(self, field_name).prefetch for f in fields_to_prefetch or (): source = source.prefetch_related(prefix + f) # Related pre-fetch node = self.get_node_for(field_name) if node: if fields_to_prefetch is None: source = source.prefetch_related( Prefetch(prefix + field_name, queryset=node.filter(node.Meta.source, **data['kwargs']))) source = node.query_optimization(source, data['fields'], prefix=field_name + '__') return source
def filter_queryset(self, request: Request, queryset: QuerySet, view: GenericViewSet): if (not issubclass(view.get_serializer_class(), FlexFieldsSerializerMixin) or request.method != "GET"): return queryset auto_remove_fields_from_query = getattr( view, "auto_remove_fields_from_query", True) auto_select_related_on_query = getattr(view, "auto_select_related_on_query", True) required_query_fields = list(getattr(view, "required_query_fields", [])) serializer = view.get_serializer( # type: FlexFieldsSerializerMixin context=view.get_serializer_context()) serializer.apply_flex_fields() model_fields = [ self._get_field(field.source, queryset.model) for field in serializer.fields.values() if self._get_field(field.source, queryset.model) ] nested_model_fields = [ self._get_field(field.source, queryset.model) for field in serializer.fields.values() if self._get_field(field.source, queryset.model) and field.field_name in serializer.expanded_fields ] if auto_remove_fields_from_query: queryset = queryset.only(*(required_query_fields + [ model_field.name for model_field in model_fields if not model_field.is_relation or model_field.many_to_one ])) if auto_select_related_on_query and nested_model_fields: queryset = queryset.select_related( *(model_field.name for model_field in nested_model_fields if model_field.is_relation and model_field.many_to_one)) queryset = queryset.prefetch_related( *(model_field.name for model_field in nested_model_fields if model_field.is_relation and not model_field.many_to_one)) return queryset
def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet: if params.get(TREND_FILTER_TYPE_ACTIONS): queryset = queryset.filter(pk__in=[ action.id for action in Filter({ 'actions': json.loads(params.get('actions', '[]')) }).actions ]) if params.get('include_count'): queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS)) queryset = queryset.prefetch_related( Prefetch('steps', queryset=ActionStep.objects.order_by('id'))) return queryset\ .filter(team_id=team_id)\ .order_by('-id')
def identifiers_dataframe(self, qs: QuerySet) -> pd.DataFrame: """ Returns identifiers references for an assessment from external databases or tools. Args: qs (QuerySet): A queryset Returns: pd.DataFrame: A pandas dataframe """ qs = qs.prefetch_related("identifiers") captured = {None, constants.HERO, constants.PUBMED} diff = set( qs.values_list("identifiers__database", flat=True).distinct()) - captured if diff: logging.warning( f"Missing some identifier IDs from id export: {diff}") data = defaultdict(dict) # capture HERO ids heros = qs.filter(identifiers__database=constants.HERO).values_list( "id", "identifiers__unique_id") for hawc_id, hero_id in heros: data[hawc_id]["hero_id"] = int(hero_id) # capture PUBMED ids pubmeds = qs.filter( identifiers__database=constants.PUBMED).values_list( "id", "identifiers__unique_id") for hawc_id, pubmed_id in pubmeds: data[hawc_id]["pubmed_id"] = int(pubmed_id) # create a dataframe df = (pd.DataFrame.from_dict(data, orient="index").reset_index().rename( columns={"index": "reference_id"})) # set missing columns for col in ["hero_id", "pubmed_id"]: if col not in df.columns: df[col] = None return df
def _filter_request(self, request: request.Request, queryset: QuerySet, team: Team) -> QuerySet: if request.GET.get('id'): people = request.GET['id'].split(',') queryset = queryset.filter(id__in=people) if request.GET.get('search'): parts = request.GET['search'].split(' ') contains = [] for part in parts: if ':' in part: queryset = queryset.filter(properties__has_key=part.split(':')[1]) else: contains.append(part) queryset = queryset.filter(properties__icontains=' '.join(contains)) if request.GET.get('cohort'): queryset = self._filter_cohort(request, queryset, team) queryset = queryset.prefetch_related(Prefetch('persondistinctid_set', to_attr='distinct_ids_cache')) return queryset
def decorate_queryset( cls, feature_type: FeatureType, queryset: models.QuerySet, output_crs: CRS, **params, ) -> models.QuerySet: """Apply presentation layer logic to the queryset.""" # Avoid fetching relations, fetch these within the same query, related = cls._get_prefetch_related(feature_type, output_crs) if related: queryset = queryset.prefetch_related(*related) # Also limit the queryset to the actual fields that are shown. # No need to request more data fields = [ f.orm_field for f in feature_type.xsd_type.elements if not f.is_many or f.is_array # exclude M2M, but include ArrayField ] return queryset.only("pk", *fields)
def _filter_request(self, request: request.Request, queryset: QuerySet, team: Team) -> QuerySet: if request.GET.get("id"): people = request.GET["id"].split(",") queryset = queryset.filter(id__in=people) if request.GET.get("search"): parts = request.GET["search"].split(" ") contains = [] for part in parts: if ":" in part: queryset = queryset.filter( properties__has_key=part.split(":")[1]) else: contains.append(part) queryset = queryset.filter( Q(properties__icontains=" ".join(contains)) | Q(persondistinctid__distinct_id__icontains=" ".join( contains))).distinct("id") if request.GET.get("cohort"): queryset = queryset.filter(cohort__id=request.GET["cohort"]) if request.GET.get("properties"): queryset = queryset.filter( Filter(data={ "properties": json.loads(request.GET["properties"]) }).properties_to_Q(team_id=team.pk)) queryset_category_pass = None category = request.query_params.get("category") if category == "identified": queryset_category_pass = queryset.filter elif category == "anonymous": queryset_category_pass = queryset.exclude if queryset_category_pass is not None: queryset = queryset_category_pass(is_identified=True) queryset = queryset.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return queryset
def prefetch(self, queryset: models.QuerySet) -> models.QuerySet: """Recursively prefetch joins to speed up the database query.""" subquery = self.model.objects.all() if self.filters: q = reduce(operator.and_, [f.get() for f in self.filters]) subquery = subquery.filter(q) if self.sort: subquery = subquery.order_by(*self.sort) subquery = subquery.select_related( *[f for f in self._one_fields if f not in self.joins.keys()]) subquery = subquery.prefetch_related( *[f for f in self._many_fields if f not in self.joins.keys()]) new = queryset.prefetch_related( models.Prefetch(self.field, queryset=subquery)) # Recursively prefetch inner joins for j in self.joins.values(): new = j.prefetch(new) return new
def filter_user_not_in(self, queryset: QuerySet, name, value): return queryset.prefetch_related( Prefetch('users', queryset=models.User.objects.exclude(pk=value)) )
def prefetch_related_fields(qs: QuerySet): return qs.prefetch_related( "source", "object__type", "parent_object", "problem_type", )
def bulk_updater(self, queryset: QuerySet, update_fields: Optional[Set[str]] = None, return_pks: bool = False, local_only: bool = False, querysize: Optional[int] = None) -> Optional[Set[Any]]: """ Update local computed fields and descent in the dependency tree by calling ``update_dependent`` for dependent models. This method does the local field updates on `queryset`: - eval local `MRO` of computed fields - expand `update_fields` - apply optional `select_related` and `prefetch_related` rules to `queryset` - walk all records and recalculate fields in `update_fields` - aggregate changeset and save as batched `bulk_update` to the database By default this method triggers the update of dependent models by calling ``update_dependent`` with `update_fields` (next level of tree traversal). This can be suppressed by setting `local_only=True`. If `return_pks` is set, the method returns a set of altered pks of `queryset`. """ model: Type[Model] = queryset.model # distinct issue workaround # the workaround is needed for already sliced/distinct querysets coming from outside # TODO: distinct is a major query perf smell, and is in fact only needed on back relations # may need some rework in _querysets_for_update # ideally we find a way to avoid it for forward relations # also see #101 if queryset.query.can_filter() and not queryset.query.distinct_fields: queryset = queryset.distinct() else: queryset = model.objects.filter( pk__in=subquery_pk(queryset, queryset.db)) # correct update_fields by local mro mro = self.get_local_mro(model, update_fields) fields: Any = set( mro) # FIXME: narrow type once issue in django-stubs is resolved if update_fields: update_fields.update(fields) select = self.get_select_related(model, fields) prefetch = self.get_prefetch_related(model, fields) if select: queryset = queryset.select_related(*select) if prefetch: queryset = queryset.prefetch_related(*prefetch) pks = [] if fields: q_size = self.get_querysize(model, fields, querysize) change: List[Model] = [] for elem in slice_iterator(queryset, q_size): # note on the loop: while it is technically not needed to batch things here, # we still prebatch to not cause memory issues for very big querysets has_changed = False for comp_field in mro: new_value = self._compute(elem, model, comp_field) if new_value != getattr(elem, comp_field): has_changed = True setattr(elem, comp_field, new_value) if has_changed: change.append(elem) pks.append(elem.pk) if len(change) >= self._batchsize: self._update(queryset, change, fields) change = [] if change: self._update(queryset, change, fields) # trigger dependent comp field updates from changed records # other than before we exit the update tree early, if we have no changes at all # also cuts the update tree for recursive deps (tree-like) if not local_only and pks: self.update_dependent(model.objects.filter(pk__in=pks), model, fields, update_local=False) return set(pks) if return_pks else None
def __prefetch(query: QuerySet, prefetch: bool) -> QuerySet: """Helper method to decide whether to prefetch or not""" if prefetch: query = query.prefetch_related("categories", "leaf_category") return query