def _location_queryset_helper(domain, location_pks): fixture_ids = With(raw_cte_sql( """ SELECT "id", "path", "depth" FROM get_location_fixture_ids(%s::TEXT, %s) """, [domain, location_pks], {"id": int_field, "path": int_array, "depth": int_field}, )) return fixture_ids.join( SQLLocation.objects.all(), id=fixture_ids.col.id, ).annotate( path=fixture_ids.col.path, depth=fixture_ids.col.depth, ).with_cte(fixture_ids).prefetch_related('location_type', 'parent')
def get_ancestors(self, node, ascending=False, include_self=False): """Query node ancestors :param node: A model instance or a QuerySet or Q object querying the adjacency list model. If a QuerySet, it should query a single value with something like `.values('id')`. If Q the `include_self` argument will be ignored. :param ascending: Order of results. The default (`False`) gets results in descending order (root ancestor first, immediate parent last). :param include_self: :returns: A `QuerySet` instance. """ if isinstance(node, Q): where = node elif include_self: if isinstance(node, QuerySet): if _is_empty(node): return self.none() where = Q(id__in=node.order_by()) else: where = Q(id=node.id) elif isinstance(node, QuerySet): if _is_empty(node): return self.none() where = Q(id__in=node.order_by().values("parent_id")) else: where = Q(id=node.parent_id) def make_cte_query(cte): return self.filter(where).order_by().annotate( _depth=Value(0, output_field=field), ).union( cte.join( self.all().order_by(), id=cte.col.parent_id, ).annotate( _depth=cte.col._depth + Value(1, output_field=field), ), ) cte = With.recursive(make_cte_query) return ( cte.queryset() .with_cte(cte) .order_by(("" if ascending else "-") + "_depth") )
def get_descendants(self, node, include_self=False): """Query node descendants :param node: A model instance or a QuerySet or Q object querying the adjacency list model. If a QuerySet, it should query a single value with something like `.values('id')`. If Q the `include_self` argument will be ignored. :returns: A `QuerySet` instance. """ ordering_col = self.model.ordering_col_attr discard_dups = False if isinstance(node, Q): where = node discard_dups = True elif include_self: if isinstance(node, QuerySet): if _is_empty(node): return self.none() where = Q(id__in=node.order_by()) discard_dups = True else: where = Q(id=node.id) elif isinstance(node, QuerySet): if _is_empty(node): return self.none() where = Q(parent_id__in=node.order_by()) discard_dups = True else: where = Q(parent_id=node.id) def make_cte_query(cte): return self.filter(where).order_by().annotate( _cte_ordering=str_array(ordering_col), ).union( cte.join( self.all().order_by(), parent_id=cte.col.id, ).annotate( _cte_ordering=array_append( cte.col._cte_ordering, F(ordering_col), ) ), all=True, ) cte = With.recursive(make_cte_query) query = cte.queryset().with_cte(cte) if discard_dups: # Remove duplicates when the supplied Queryset or Q object # may contain/match both parents and children. For a given # id, retain the row with the longest path. TODO remove this # and ensure duplicates do not matter or the criteria never # matches both parents and children in all calling code. xdups = With( cte.queryset().annotate( max_len=array_length( F("_cte_ordering"), output_field=field ), ).distinct("id").order_by( "id", "-max_len", ).values( "id", "_cte_ordering", ), name="xdups" ) query = query.annotate( _exclude_dups=Exists(SubQueryset(xdups.queryset().filter( id=OuterRef("id"), _cte_ordering=OuterRef("_cte_ordering"), ))) ).filter(_exclude_dups=True).with_cte(xdups) return query.order_by(cte.col._cte_ordering)
def get_descendants(self, node, include_self=False): """Query node descendants :param node: A model instance or a QuerySet or Q object querying the adjacency list model. If a QuerySet, it should query a single value with something like `.values('id')`. If Q the `include_self` argument will be ignored. :returns: A `QuerySet` instance. """ ordering_col = self.model.ordering_col_attr discard_dups = False if isinstance(node, Q): where = node discard_dups = True elif include_self: if isinstance(node, QuerySet): if _is_empty(node): return self.none() where = Q(id__in=node.order_by()) discard_dups = True else: where = Q(id=node.id) elif isinstance(node, QuerySet): if _is_empty(node): return self.none() where = Q(parent_id__in=node.order_by()) discard_dups = True else: where = Q(parent_id=node.id) def make_cte_query(cte): return self.filter(where).order_by().annotate( _cte_ordering=str_array(ordering_col), ).union( cte.join( self.all().order_by(), parent_id=cte.col.id, ).annotate(_cte_ordering=array_append( cte.col._cte_ordering, F(ordering_col), )), all=True, ) cte = With.recursive(make_cte_query) query = cte.queryset().with_cte(cte) if discard_dups: # Remove duplicates when the supplied Queryset or Q object # may contain/match both parents and children. For a given # id, retain the row with the longest path. TODO remove this # and ensure duplicates do not matter or the criteria never # matches both parents and children in all calling code. xdups = With(cte.queryset().annotate(max_len=array_length( F("_cte_ordering"), output_field=field), ).distinct("id").order_by( "id", "-max_len", ).values( "id", "_cte_ordering", ), name="xdups") query = query.annotate( _exclude_dups=Exists(xdups.queryset().filter( id=OuterRef("id"), _cte_ordering=OuterRef("_cte_ordering"), ))).filter(_exclude_dups=True).with_cte(xdups) return query.order_by(cte.col._cte_ordering)
def build(self): tree_ids = self.query.values('tree_id') return With(ContentNode.objects.filter(tree_id__in=tree_ids).values( *set(self.columns)), name='tree_cte')
def get_recursive_subgroups(user_group: UserGroup) -> "QuerySet[UserGroup]": cte = With.recursive( lambda cte: UserGroup.objects.filter(id=user_group.id).values("id"). union(cte.join(UserGroup, direct_supergroups=cte.col.id).values("id"))) return cte.join(UserGroup, id=cte.col.id).with_cte(cte)
def get_recursive_membership_groups( user_profile: UserProfile) -> "QuerySet[UserGroup]": cte = With.recursive( lambda cte: user_profile.direct_groups.values("id").union( cte.join(UserGroup, direct_subgroups=cte.col.id).values("id"))) return cte.join(UserGroup, id=cte.col.id).with_cte(cte)
def handle(self, *args, **options): start = time.time() # Mark invalid titles titlestart = time.time() logging.info('Marking blank titles...') count = ContentNode.objects.exclude(complete=False).filter(title='').order_by().update(complete=False) logging.info('Marked {} invalid titles (finished in {})'.format(count, time.time() - titlestart)) # Mark invalid licenses licensestart = time.time() logging.info('Marking blank licenses...') count = ContentNode.objects.exclude(complete=False, kind_id=content_kinds.TOPIC).filter(license__isnull=True).order_by().update(complete=False) logging.info('Marked {} invalid licenses (finished in {})'.format(count, time.time() - licensestart)) licensestart = time.time() logging.info('Marking blank license descriptions...') custom_licenses = list(License.objects.filter(is_custom=True).values_list("pk", flat=True)) count = ContentNode.objects.exclude(complete=False, kind_id=content_kinds.TOPIC)\ .filter(license_id__in=custom_licenses).filter(Q(license_description__isnull=True) | Q(license_description=''))\ .order_by().update(complete=False) logging.info('Marked {} invalid license descriptions (finished in {})'.format(count, time.time() - licensestart)) licensestart = time.time() logging.info('Marking blank copyright holders...') copyright_licenses = list(License.objects.filter(copyright_holder_required=True).values_list("pk", flat=True)) count = ContentNode.objects.exclude(complete=False, kind_id=content_kinds.TOPIC)\ .filter(license_id__in=copyright_licenses).filter(Q(copyright_holder__isnull=True) | Q(copyright_holder=''))\ .order_by().update(complete=False) logging.info('Marked {} invalid copyright holders (finished in {})'.format(count, time.time() - licensestart)) # Mark invalid file resources resourcestart = time.time() logging.info('Marking file resources...') file_check_query = With(File.objects.filter(preset__supplementary=False).values("contentnode_id").order_by(), name="t_file") query = file_check_query.join(ContentNode, id=file_check_query.col.contentnode_id, _join_type=LOUTER)\ .with_cte(file_check_query) \ .annotate(t_contentnode_id=file_check_query.col.contentnode_id) \ .exclude(kind_id=content_kinds.TOPIC) \ .exclude(kind_id=content_kinds.EXERCISE) \ .exclude(complete=False) \ .filter(t_contentnode_id__isnull=True)\ .order_by() count = ContentNode.objects.filter(id__in=query.order_by().values_list('id', flat=True)).update(complete=False) logging.info('Marked {} invalid file resources (finished in {})'.format(count, time.time() - resourcestart)) # Mark invalid exercises exercisestart = time.time() logging.info('Marking exercises...') has_questions_query = With(AssessmentItem.objects.all().values("contentnode_id").order_by(), name="t_assessmentitem") query = has_questions_query.join(ContentNode, id=has_questions_query.col.contentnode_id, _join_type=LOUTER)\ .with_cte(has_questions_query) \ .annotate(t_contentnode_id=has_questions_query.col.contentnode_id) \ .filter(kind_id=content_kinds.EXERCISE) \ .exclude(complete=False) \ .filter(t_contentnode_id__isnull=True)\ .order_by() exercisestart = time.time() count = ContentNode.objects.filter(id__in=query.order_by().values_list('id', flat=True)).update(complete=False) logging.info('Marked {} questionless exercises (finished in {})'.format(count, time.time() - exercisestart)) exercisestart = time.time() exercise_check_query = With(AssessmentItem.objects.exclude(type=exercises.PERSEUS_QUESTION) .filter( Q(question='') | Q(answers='[]') | # hack to check if no correct answers (~Q(type=exercises.INPUT_QUESTION) & ~Q(answers__iregex=r'"correct":\s*true'))).order_by(), name="t_assessmentitem") query = exercise_check_query.join(ContentNode, id=has_questions_query.col.contentnode_id)\ .with_cte(exercise_check_query) \ .annotate(t_contentnode_id=exercise_check_query.col.contentnode_id) \ .filter(kind_id=content_kinds.EXERCISE) \ .exclude(complete=False) \ .order_by() count = ContentNode.objects.filter(id__in=query.order_by().values_list('id', flat=True)).update(complete=False) logging.info('Marked {} invalid exercises (finished in {})'.format(count, time.time() - exercisestart)) exercisestart = time.time() logging.info('Marking mastery_model less exercises...') count = ContentNode.objects.exclude(complete=False).filter(kind_id=content_kinds.EXERCISE).filter(~Q(extra_fields__has_key='mastery_model'))\ .order_by().update(complete=False) logging.info('Marked {} mastery_model less exercises(finished in {})'.format(count, time.time() - exercisestart)) exercisestart = time.time() logging.info('Marking bad mastery model exercises...') count = ContentNode.objects.exclude(complete=False).filter(kind_id=content_kinds.EXERCISE)\ .filter(Q(extra_fields__mastery_model=exercises.M_OF_N) & (~Q(extra_fields__has_key='m') | ~Q(extra_fields__has_key='n')))\ .order_by().update(complete=False) logging.info('Marked {} bad mastery model exercises (finished in {})'.format(count, time.time() - exercisestart)) logging.info('Mark incomplete command completed in {}s'.format(time.time() - start))
def test_named_ctes(self): def make_paths_cte(paths): return Region.objects.filter(parent__isnull=True).values( "name", path=F("name"), ).union( paths.join(Region, parent=paths.col.name).values( "name", path=Concat( paths.col.path, Value(" "), F("name"), output_field=text_field, ), ), all=True, ) paths = With.recursive(make_paths_cte, name="region_paths") def make_groups_cte(groups): return paths.join(Region, name=paths.col.name).values( "name", parent_path=paths.col.path, parent_name=F("name"), ).union( groups.join(Region, parent=groups.col.name).values( "name", parent_path=groups.col.parent_path, parent_name=groups.col.parent_name, ), all=True, ) groups = With.recursive(make_groups_cte, name="region_groups") region_totals = With( groups.join(Order, region_id=groups.col.name).values( name=groups.col.parent_name, path=groups.col.parent_path, ).annotate( orders_count=Count("id"), region_total=Sum("amount"), ), name="region_totals", ) regions = ( region_totals.join(Region, name=region_totals.col.name).with_cte( paths).with_cte(groups).with_cte(region_totals).annotate( path=region_totals.col.path, # count of orders in this region and all subregions orders_count=region_totals.col.orders_count, # sum of order amounts in this region and all subregions region_total=region_totals.col.region_total, ).order_by("path")) print(regions.query) data = [(r.name, r.orders_count, r.region_total) for r in regions] self.assertEqual(data, [ ('proxima centauri', 4, 2033), ('proxima centauri b', 3, 33), ('sun', 18, 1374), ('earth', 7, 132), ('moon', 3, 6), ('mars', 3, 123), ('mercury', 3, 33), ('venus', 4, 86), ])
def merge_m2m(self, data, field, prefetch): # Strategy: pull out all my IDs, do a reverse filter on remote object. # e.g.: If prefetching User.groups, do # Groups.filter(users__in=<user_ids>) ids = self._get_ids(data) base_qs = prefetch.query.queryset # base queryset on remote model remote_pk_field = base_qs.model._meta.pk.attname # get pk field name reverse_field = get_reverse_m2m_field_name(field) if reverse_field is None: # Note: We can't just reuse self.queryset here because it's # been sliced already. filters = {field.attname + '__isnull': False} qs = self.queryset.model.objects.filter(pk__in=ids, **filters) joins = list(qs.values_list(field.attname, self.pk_field)) else: # Get reverse mapping (for User.groups, get Group.users) # Note: `qs` already has base filter applied on remote model. filters = {f'{reverse_field}__in': ids} if has_limits(base_qs): # remove limits, then use CTE + RowNumber # to re-introduce them using window functions base_qs = base_qs._clone() low, high = get_limits(base_qs) clear_limits(base_qs) order_by = base_qs.query.order_by if not order_by: # if there is no order, we need to use pk order_by = ['pk'] cte = With( base_qs.annotate( **{ '..row': Window(expression=RowNumber(), partition_by=[reverse_field], order_by=order_by) }).filter(**filters)) joins = cte.queryset().with_cte(cte).filter(**{ '..row__lte': high, '..row__gt': low }).order_by(*order_by).distinct() else: # no limits, use simple filtering joins = base_qs.filter(**filters) joins = list(joins.values_list(remote_pk_field, reverse_field)) # Fetch remote objects, as values. remote_ids = set([o[0] for o in joins]) query = prefetch.query._clone() # remove limits to get IDs without extra filtering issues if has_limits(query.queryset): clear_limits(query.queryset) remote_objects = query.get_ids(remote_ids).execute() id_map = self._make_id_map(remote_objects, pk_field=remote_pk_field) # Create mapping of local ID -> remote objects to_attr = prefetch.to_attr or prefetch.field object_map = defaultdict(List) for remote_id, local_id in joins: if remote_id in id_map: object_map[local_id].append(id_map[remote_id]) # Merge into working data set. for row in data: row[to_attr] = object_map[row.get(self.pk_field, row['pk'])] return data
def disaster_filter_function(filters: dict, download_type: str, values: List[str]): aggregation_mapping = { "disaster_recipient": _disaster_recipient_aggregations } def_codes = filters["def_codes"] query = filters.get("query") award_type_codes = filters.get("award_type_codes") award_filters = [ ~Q(total_loan_value=0) | ~Q(total_obligation_by_award=0) | ~Q(total_outlay_by_award=0) ] if query: query_text = query["text"] q = Q() for field in query["fields"]: q |= Q(**{f"{field}__icontains": query_text}) award_filters.append(q) if award_type_codes: award_filters.append(Q(type__in=award_type_codes)) faba_filters = [ filter_by_defc_closed_periods(), Q(disaster_emergency_fund__code__in=def_codes) ] dollar_annotations = { "inner_obligation": Coalesce(Sum("transaction_obligated_amount"), 0), "inner_outlay": Coalesce( Sum( Case( When( filter_by_latest_closed_periods(), then=Coalesce(F("gross_outlay_amount_by_award_cpe"), 0) + Coalesce( F("ussgl487200_down_adj_pri_ppaid_undel_orders_oblig_refund_cpe" ), 0) + Coalesce( F("ussgl497200_down_adj_pri_paid_deliv_orders_oblig_refund_cpe" ), 0), ), default=Value(0), )), 0, ), } cte = With( FinancialAccountsByAwards.objects.filter( *faba_filters).values("award_id").annotate( **dollar_annotations).exclude(inner_obligation=0, inner_outlay=0)) return (cte.join(AwardSearchView, award_id=cte.col.award_id).with_cte(cte).annotate( total_obligation_by_award=cte.col.inner_obligation, total_outlay_by_award=cte.col.inner_outlay).filter( *award_filters).values(*values).annotate( **aggregation_mapping[download_type]()))
def construct_loan_queryset(self, faba_grouping_column, base_model, base_model_column): grouping_key = F(faba_grouping_column) if isinstance(faba_grouping_column, str) else faba_grouping_column base_values = With( FinancialAccountsByAwards.objects.filter( Q(award__type__in=loan_type_mapping), self.all_closed_defc_submissions, self.is_in_provided_def_codes, ) .annotate( grouping_key=grouping_key, total_loan_value=F("award__total_loan_value"), reporting_fiscal_year=F("submission__reporting_fiscal_year"), reporting_fiscal_period=F("submission__reporting_fiscal_period"), quarter_format_flag=F("submission__quarter_format_flag"), ) .filter(grouping_key__isnull=False) .values( "grouping_key", "financial_accounts_by_awards_id", "award_id", "transaction_obligated_amount", "gross_outlay_amount_by_award_cpe", "reporting_fiscal_year", "reporting_fiscal_period", "quarter_format_flag", "total_loan_value", ), "base_values", ) q = Q() for sub in final_submissions_for_all_fy(): q |= ( Q(reporting_fiscal_year=sub.fiscal_year) & Q(quarter_format_flag=sub.is_quarter) & Q(reporting_fiscal_period=sub.fiscal_period) ) aggregate_faba = With( base_values.queryset() .values("grouping_key") .annotate( obligation=Coalesce(Sum("transaction_obligated_amount"), 0), outlay=Coalesce(Sum(Case(When(q, then=F("gross_outlay_amount_by_award_cpe")), default=Value(0),)), 0,), ) .values("grouping_key", "obligation", "outlay"), "aggregate_faba", ) distinct_awards = With( base_values.queryset().values("grouping_key", "award_id", "total_loan_value").distinct(), "distinct_awards", ) aggregate_awards = With( distinct_awards.queryset() .values("grouping_key") .annotate(award_count=Count("award_id"), face_value_of_loan=Coalesce(Sum("total_loan_value"), 0)) .values("grouping_key", "award_count", "face_value_of_loan"), "aggregate_awards", ) return Bunch( award_count_column=aggregate_awards.col.award_count, obligation_column=aggregate_faba.col.obligation, outlay_column=aggregate_faba.col.outlay, face_value_of_loan_column=aggregate_awards.col.face_value_of_loan, queryset=aggregate_awards.join( aggregate_faba.join(base_model, **{base_model_column: aggregate_faba.col.grouping_key}), **{base_model_column: aggregate_awards.col.grouping_key}, ) .with_cte(base_values) .with_cte(aggregate_faba) .with_cte(distinct_awards) .with_cte(aggregate_awards), )