Python TransactionSearch示例，usaspending_api.common.elasticsearch.search_wrappers.TransactionSearch Python示例

示例#1

0

显示文件

 def query_elasticsearch(self, time_periods: list) -> list:
     filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
         self.filters)
     search = TransactionSearch().filter(filter_query)
     self.apply_elasticsearch_aggregations(search)
     response = search.handle_execute()
     return self.build_elasticsearch_result(response.aggs, time_periods)

示例#2

0

显示文件

文件： spending_by_geography.py 项目： bminahan-kc/usaspending-api

    def build_elasticsearch_search_with_aggregation(
            self, filter_query: ES_Q) -> Optional[TransactionSearch]:
        # Create the initial search using filters
        search = TransactionSearch().filter(filter_query)

        # Check number of unique terms (buckets) for performance and restrictions on maximum buckets allowed
        bucket_count = get_number_of_unique_terms(filter_query,
                                                  f"{self.agg_key}.hash")

        if bucket_count == 0:
            return None

        # Add 100 to make sure that we consider enough records in each shard for accurate results
        group_by_agg_key = A("terms",
                             field=self.agg_key,
                             size=bucket_count,
                             shard_size=bucket_count + 100)
        sum_aggregations = get_scaled_sum_aggregations(self.obligation_column)
        sum_field = sum_aggregations["sum_field"]

        search.aggs.bucket("group_by_agg_key",
                           group_by_agg_key).metric("sum_field", sum_field)

        # Set size to 0 since we don't care about documents returned
        search.update_from_dict({"size": 0})

        return search

示例#3

0

显示文件

def get_total_results(keyword):
    group_by_agg_key_values = {
        "filters": {
            category: {
                "terms": {
                    "type": types
                }
            }
            for category, types in INDEX_ALIASES_TO_AWARD_TYPES.items()
        }
    }
    aggs = A("filters", **group_by_agg_key_values)
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        {"keyword_search": [es_minimal_sanitize(keyword)]})
    search = TransactionSearch().filter(filter_query)
    search.aggs.bucket("types", aggs)
    response = search.handle_execute()

    if response is not None:
        try:
            return response["aggregations"]["types"]["buckets"]
        except KeyError:
            logger.error("Unexpected Response")
    else:
        logger.error("No Response")
        return None

示例#4

0

显示文件

文件： elasticsearch_helper.py 项目： bminahan-kc/usaspending-api

def get_number_of_unique_terms(filter_query: ES_Q, field: str) -> int:
    search = TransactionSearch().filter(filter_query)
    cardinality_aggregation = A("cardinality", field=field)
    search.aggs.metric("field_count", cardinality_aggregation)
    response = search.handle_execute()
    response_dict = response.aggs.to_dict()
    return response_dict.get("field_count", {"value": 0})["value"]

示例#5

0

显示文件

def get_sum_and_count_aggregation_results(keyword):
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        {"keyword_search": [es_minimal_sanitize(keyword)]})
    search = TransactionSearch().filter(filter_query)
    search.aggs.bucket("prime_awards_obligation_amount",
                       {"sum": {
                           "field": "transaction_amount"
                       }})
    search.aggs.bucket("prime_awards_count",
                       {"value_count": {
                           "field": "transaction_id"
                       }})
    response = search.handle_execute()

    if response is not None:
        try:
            results = {}
            results["prime_awards_count"] = response["aggregations"][
                "prime_awards_count"]["value"]
            results["prime_awards_obligation_amount"] = round(
                response["aggregations"]["prime_awards_obligation_amount"]
                ["value"], 2)
            return results
        except KeyError:
            logger.exception("Unexpected Response")
    else:
        return None

示例#6

0

显示文件

    def build_elasticsearch_search_with_aggregations(
            self, filter_query: ES_Q) -> Optional[TransactionSearch]:
        """
        Using the provided ES_Q object creates a TransactionSearch object with the necessary applied aggregations.
        """
        # Create the filtered Search Object
        search = TransactionSearch().filter(filter_query)

        sum_aggregations = get_scaled_sum_aggregations(
            "generated_pragmatic_obligation", self.pagination)

        # Need to handle high cardinality categories differently; this assumes that the Search object references
        # an Elasticsearch cluster that has a "routing" equal to "self.category.agg_key"
        if self.category.name in self.high_cardinality_categories:
            # 10k is the maximum number of allowed buckets
            size = self.pagination.upper_limit
            shard_size = size
            sum_bucket_sort = sum_aggregations["sum_bucket_truncate"]
            group_by_agg_key_values = {"order": {"sum_field": "desc"}}
        else:
            # Get count of unique buckets; terminate early if there are no buckets matching criteria
            bucket_count = get_number_of_unique_terms(
                filter_query, f"{self.category.agg_key}.hash")
            if bucket_count == 0:
                return None
            else:
                # Add 100 to make sure that we consider enough records in each shard for accurate results;
                # Only needed for non high-cardinality fields since those are being routed
                size = bucket_count
                shard_size = bucket_count + 100
                sum_bucket_sort = sum_aggregations["sum_bucket_sort"]
                group_by_agg_key_values = {}

        if shard_size > 10000:
            logger.warning(
                f"Max number of buckets reached for aggregation key: {self.category.agg_key}."
            )
            raise ElasticsearchConnectionException(
                "Current filters return too many unique items. Narrow filters to return results."
            )

        # Define all aggregations needed to build the response
        group_by_agg_key_values.update({
            "field": self.category.agg_key,
            "size": size,
            "shard_size": shard_size
        })
        group_by_agg_key = A("terms", **group_by_agg_key_values)

        sum_field = sum_aggregations["sum_field"]

        # Apply the aggregations to the TransactionSearch object
        search.aggs.bucket("group_by_agg_key", group_by_agg_key).metric(
            "sum_field", sum_field).pipeline("sum_bucket_sort",
                                             sum_bucket_sort)

        # Set size to 0 since we don't care about documents returned
        search.update_from_dict({"size": 0})

        return search

示例#7

0

显示文件

def get_sum_aggregation_results(keyword, field="transaction_amount"):
    group_by_agg_key_values = {"field": field}
    aggs = A("sum", **group_by_agg_key_values)
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        {"keywords": es_minimal_sanitize(keyword)})
    search = TransactionSearch().filter(filter_query)
    search.aggs.bucket("transaction_sum", aggs)
    response = search.handle_execute()

    if response:
        return response["aggregations"]
    else:
        return None

示例#8

0

显示文件

def get_number_of_unique_terms(filter_query: ES_Q, field: str) -> int:
    """
    Returns the count for a specific filter_query.
    NOTE: This will only work when the number of unique values is 40k or less. This is captured in the Elasticsearch
    documentation for the cardinality aggregation:
        "The maximum supported value is 40000, thresholds above this number will
        have the same effect as a threshold of 40000"
    """
    search = TransactionSearch().filter(filter_query)
    cardinality_aggregation = A("cardinality", field=field)
    search.aggs.metric("field_count", cardinality_aggregation)
    response = search.handle_execute()
    response_dict = response.aggs.to_dict()
    return response_dict.get("field_count", {"value": 0})["value"]

示例#9

0

显示文件

文件： elasticsearch_helper.py 项目： teamj135/usaspending-api

def get_number_of_unique_terms_for_transactions(filter_query: ES_Q, field: str) -> int:
    """
    Returns the count for a specific filter_query.
    NOTE: Counts below the precision_threshold are expected to be close to accurate (per the Elasticsearch
          documentation). Since aggregations do not support more than 10k buckets this value is hard coded to
          11k to ensure that endpoints using Elasticsearch do not cross the 10k threshold. Elasticsearch endpoints
          should be implemented with a safeguard in case this count is above 10k.
    """
    return _get_number_of_unique_terms(TransactionSearch().filter(filter_query), field)

示例#10

0

显示文件

    def post(self, request):
        """Returns boolean of whether a download request is greater than the max limit. """
        models = [{
            "name": "subawards",
            "key": "subawards",
            "type": "boolean",
            "default": False
        }]
        models.extend(copy.deepcopy(AWARD_FILTER))
        self.original_filters = request.data.get("filters")
        json_request = TinyShield(models).block(request.data)

        # If no filters in request return empty object to return all transactions
        filters = json_request.get("filters", {})

        if json_request["subawards"]:
            total_count = subaward_filter(filters).count()
        else:
            filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
                filters)
            search = TransactionSearch().filter(filter_query)
            total_count = search.handle_count()

        if total_count is None:
            total_count = 0

        result = {
            "calculated_transaction_count":
            total_count,
            "maximum_transaction_limit":
            settings.MAX_DOWNLOAD_LIMIT,
            "transaction_rows_gt_limit":
            total_count > settings.MAX_DOWNLOAD_LIMIT,
            "messages": [
                get_generic_filters_message(
                    self.original_filters.keys(),
                    [elem["name"] for elem in AWARD_FILTER])
            ],
        }

        return Response(result)

示例#11

0

显示文件

def get_download_ids(keyword, field, size=10000):
    """
    returns a generator that
    yields list of transaction ids in chunksize SIZE

    Note: this only works for fields in ES of integer type.
    """
    n_iter = DOWNLOAD_QUERY_SIZE // size

    results = get_total_results(keyword)
    if results is None:
        logger.error(
            "Error retrieving total results. Max number of attempts reached")
        return
    total = sum(results[category]["doc_count"]
                for category in INDEX_ALIASES_TO_AWARD_TYPES.keys())
    required_iter = (total // size) + 1
    n_iter = min(max(1, required_iter), n_iter)
    for i in range(n_iter):
        filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
            {"keyword_search": [es_minimal_sanitize(keyword)]})
        search = TransactionSearch().filter(filter_query)
        group_by_agg_key_values = {
            "field": field,
            "include": {
                "partition": i,
                "num_partitions": n_iter
            },
            "size": size,
            "shard_size": size,
        }
        aggs = A("terms", **group_by_agg_key_values)
        search.aggs.bucket("results", aggs)
        response = search.handle_execute()
        if response is None:
            raise Exception("Breaking generator, unable to reach cluster")
        results = []
        for result in response["aggregations"]["results"]["buckets"]:
            results.append(result["key"])
        yield results

示例#12

0

显示文件

def create_es_search(scope, search_text, country=None, state=None):
    """
    Providing the parameters, create a dictionary representing the bool-query conditional clauses for elasticsearch

    Args:
        scope: which city field was chosen for searching `pop` (place of performance) or `recipient_location`
        search_text: the text the user is typing in and sent to the backend
        country: optional country selected by user
        state: optional state selected by user
    """
    # The base query that will do a wildcard term-level query
    query = {"must": [{"wildcard": {"{}_city_name.keyword".format(scope): search_text + "*"}}]}
    if country != "USA":
        # A non-USA selected country
        if country != ALL_FOREIGN_COUNTRIES:
            query["must"].append({"match": {"{scope}_country_code".format(scope=scope): country}})
        # Create a "Should Not" query with a nested bool, to get everything non-USA
        query["should"] = [
            {
                "bool": {
                    "must": {"exists": {"field": "{}_country_code".format(scope)}},
                    "must_not": [
                        {"match": {"{}_country_code".format(scope): "USA"}},
                        {"match_phrase": {"{}_country_code".format(scope): "UNITED STATES"}},
                    ],
                }
            }
        ]
        query["minimum_should_match"] = 1
    else:
        # USA is selected as country
        query["should"] = [build_country_match(scope, "USA"), build_country_match(scope, "UNITED STATES")]
        query["should"].append({"bool": {"must_not": {"exists": {"field": "{}_country_code".format(scope)}}}})
        query["minimum_should_match"] = 1
        # null country codes are being considered as USA country codes

    if state:
        # If a state was provided, include it in the filter to limit hits
        query["must"].append({"match": {"{}_state_code".format(scope): es_sanitize(state).upper()}})

    search = TransactionSearch().filter(ES_Q("bool", **query))
    return search

示例#13

0

显示文件

文件： search_elasticsearch.py 项目： lenjonemcse/usaspending-api

    def post(self, request):

        models = [{
            "name": "fields",
            "key": "fields",
            "type": "array",
            "array_type": "text",
            "text_type": "search",
            "optional": False,
        }]
        models.extend(copy.deepcopy(AWARD_FILTER))
        models.extend(copy.deepcopy(PAGINATION))
        for m in models:
            if m["name"] in ("keywords", "award_type_codes", "sort"):
                m["optional"] = False
        validated_payload = TinyShield(models).block(request.data)

        record_num = (validated_payload["page"] -
                      1) * validated_payload["limit"]
        if record_num >= settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW:
            raise UnprocessableEntityException(
                "Page #{page} of size {limit} is over the maximum result limit ({es_limit}). Consider using custom data downloads to obtain large data sets."
                .format(
                    page=validated_payload["page"],
                    limit=validated_payload["limit"],
                    es_limit=settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW,
                ))

        if validated_payload["sort"] not in validated_payload["fields"]:
            raise InvalidParameterException(
                "Sort value not found in fields: {}".format(
                    validated_payload["sort"]))

        if "filters" in validated_payload and "no intersection" in validated_payload[
                "filters"]["award_type_codes"]:
            # "Special case": there will never be results when the website provides this value
            return Response({
                "limit": validated_payload["limit"],
                "results": [],
                "page_metadata": {
                    "page": validated_payload["page"],
                    "next": None,
                    "previous": None,
                    "hasNext": False,
                    "hasPrevious": False,
                },
            })
        sorts = {
            TRANSACTIONS_LOOKUP[validated_payload["sort"]]:
            validated_payload["order"]
        }
        lower_limit = (validated_payload["page"] -
                       1) * validated_payload["limit"]
        upper_limit = (
            validated_payload["page"]) * validated_payload["limit"] + 1
        validated_payload["filters"]["keyword_search"] = [
            es_minimal_sanitize(x)
            for x in validated_payload["filters"]["keywords"]
        ]
        validated_payload["filters"].pop("keywords")
        filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
            validated_payload["filters"])
        search = TransactionSearch().filter(filter_query).sort(
            sorts)[lower_limit:upper_limit]
        response = search.handle_execute()
        return Response(
            self.build_elasticsearch_result(validated_payload, response))

示例#14

0

显示文件

def test_top_1_fails_with_es_transactions_routed_dangerously(
        client, monkeypatch, elasticsearch_transaction_index, db):
    """
    This confirms vulnerability of high-cardinality aggregations documented in DEV-4685, that leads to inaccurate
    summing and ordering of sums when taking less buckets than the term cardinality.

    This is shown by manually applying a routing key (using a key value stuck in ``awards.piid`` field here as the
    routing key value) on index do that documents are distributed as below

    NOTE: This requires an ES cluster with at least 3 shards for the transaction index. Ours should be defaulted to 5.

    Recipient shard0   shard1   shard2   shard3   shard4
    Biz 1      $2.00
    Biz 1                       $ 7.00
    Biz 1                       $ 3.00
    Biz 1              $ 2.00
    Biz 1              $ 3.00
    Biz 1              $ 5.00
    Biz 2              $ 6.00
    Biz 2              $ 3.00
    Biz 2                       $ 2.00
    Biz 2                       $ 3.00
    Biz 2                       $ 4.00
    Biz 2     $13.00

    **IF THIS TEST FAILS**
        - Did our cluster structure change to not be 5 shards per the transaction index?
        - Did the transaction<->award DB linkage change?
        - Did we change ES version or config?
            - Investigate if Elasticsearch has changed the way they do routing or hash routing key values
    """

    # Setup data for this test
    recipient1 = uuid.uuid4()
    recipient2 = uuid.uuid4()

    # Recipient Lookup
    mommy.make("recipient.RecipientLookup",
               id=1,
               recipient_hash=recipient1,
               legal_business_name="Biz 1",
               duns="111")
    mommy.make("recipient.RecipientLookup",
               id=2,
               recipient_hash=recipient2,
               legal_business_name="Biz 2",
               duns="222")

    # Transaction FPDS
    _make_fpds_transaction(1, 1, 2.00, "2020-01-01", "111", "Biz 1")
    _make_fpds_transaction(2, 3, 7.00, "2020-02-02", "111", "Biz 1")
    _make_fpds_transaction(3, 3, 3.00, "2020-03-03", "111", "Biz 1")
    _make_fpds_transaction(4, 2, 2.00, "2020-01-02", "111", "Biz 1")
    _make_fpds_transaction(5, 2, 3.00, "2020-02-03", "111", "Biz 1")
    _make_fpds_transaction(6, 2, 5.00, "2020-03-04", "111", "Biz 1")
    _make_fpds_transaction(7, 2, 6.00, "2020-01-03", "222", "Biz 2")
    _make_fpds_transaction(8, 2, 3.00, "2020-02-04", "222", "Biz 2")
    _make_fpds_transaction(9, 3, 2.00, "2020-03-05", "222", "Biz 2")
    _make_fpds_transaction(10, 3, 3.00, "2020-01-04", "222", "Biz 2")
    _make_fpds_transaction(11, 3, 4.00, "2020-02-05", "222", "Biz 2")
    _make_fpds_transaction(12, 1, 13.00, "2020-03-06", "222", "Biz 2")

    # Awards
    # Jam a routing key value into the piid field, and use the derived piid value for routing documents to shards later
    mommy.make("awards.Award",
               id=1,
               latest_transaction_id=12,
               piid="shard_zero")
    mommy.make("awards.Award", id=2, latest_transaction_id=6, piid="shard_one")
    mommy.make("awards.Award", id=3, latest_transaction_id=9, piid="shard_two")

    # Push DB data into the test ES cluster
    # NOTE: Force routing of documents by the piid field, which will separate them int 3 groups, leading to an
    # inaccurate sum and ordering of sums

    # Using piid (derived from the transaction's award) to route transaction documents to shards
    setup_elasticsearch_test(monkeypatch,
                             elasticsearch_transaction_index,
                             routing="piid")

    search = TransactionSearch()
    total = search.handle_count()
    assert total == 12, "Should have seen 12 documents indexed for this test"

    group_by_agg = A("terms",
                     field="recipient_hash",
                     size=1,
                     shard_size=1,
                     order={"sum_agg": "desc"})
    sum_agg = A("sum", field="generated_pragmatic_obligation")
    search.aggs.bucket("results", group_by_agg).metric("sum_agg", sum_agg)

    logging.getLogger("console").debug(
        f"=>->=>->=>-> WILL RUN THIS ES QUERY: \n {search.extra(size=0).to_dict()}"
    )
    response = search.extra(size=0).handle_execute().to_dict()
    results = []
    for bucket in response["aggregations"]["results"]["buckets"]:
        results.append({
            "key": bucket["key"],
            "sum": bucket["sum_agg"]["value"]
        })
    print(results)
    assert len(results) == 1
    assert results[0]["key"] == str(
        recipient1
    ), "This botched 'Top 1' sum agg should have incorrectly chosen the lesser recipient"
    assert results[0][
        "sum"] == 20.0, "The botched 'Top 1' sum agg should have incorrectly summed up recipient totals"

示例#15

0

显示文件

def test_top_1_with_es_transactions_routed_by_recipient(
        client, monkeypatch, elasticsearch_transaction_index, db):
    """
    This tests the approach to compensating for high-cardinality aggregations
    documented in DEV-4685, to ensure accuracy and completeness of aggregations
    and sorting even when taking less buckets than the term cardinality.

    Without the code to route indexing of transaction documents in elasticsearch
    to shards by the `recipient_agg_key`, which was added to
    :meth:`usaspending_api.etl.elasticsearch_loader_helpers.csv_chunk_gen`,
    the below agg queries should lead to inaccurate results, as shown in the DEV-4538.

    With routing by recipient, documents will be allocated to shards as below

    Recipient shard0   shard1   shard2   shard3   shard4
    Biz 1      $2.00
    Biz 1      $ 7.00
    Biz 1      $ 3.00
    Biz 1      $ 2.00
    Biz 1      $ 3.00
    Biz 1      $ 5.00
    Biz 2              $ 6.00
    Biz 2              $ 3.00
    Biz 2              $ 2.00
    Biz 2              $ 3.00
    Biz 2              $ 4.00
    Biz 2              $13.00

    **IF THIS TEST FAILS**
        - Are we still using the TestElasticSearchIndex fixture to help with pushing test data to ES?
        - Did TestElasticSearchIndex indexing / routing behavior change?
        - Did our cluster structure change to not be 5 shards per the transaction index?
        - Did the transaction<->recipient DB linkage change?
        - Did we change ES version or config?
            - Investigate if Elasticsearch has changed the way they do routing or hash routing key values
    """

    # Setup data for this test

    recipient1 = uuid.uuid4()
    recipient2 = uuid.uuid4()

    # Recipient Lookup
    mommy.make("recipient.RecipientLookup",
               id=1,
               recipient_hash=recipient1,
               legal_business_name="Biz 1",
               duns="111")
    mommy.make("recipient.RecipientLookup",
               id=2,
               recipient_hash=recipient2,
               legal_business_name="Biz 2",
               duns="222")

    # Transaction FPDS
    _make_fpds_transaction(1, 1, 2.00, "2020-01-01", "111", "Biz 1")
    _make_fpds_transaction(2, 3, 7.00, "2020-02-02", "111", "Biz 1")
    _make_fpds_transaction(3, 3, 3.00, "2020-03-03", "111", "Biz 1")
    _make_fpds_transaction(4, 2, 2.00, "2020-01-02", "111", "Biz 1")
    _make_fpds_transaction(5, 2, 3.00, "2020-02-03", "111", "Biz 1")
    _make_fpds_transaction(6, 2, 5.00, "2020-03-04", "111", "Biz 1")
    _make_fpds_transaction(7, 2, 6.00, "2020-01-03", "222", "Biz 2")
    _make_fpds_transaction(8, 2, 3.00, "2020-02-04", "222", "Biz 2")
    _make_fpds_transaction(9, 3, 2.00, "2020-03-05", "222", "Biz 2")
    _make_fpds_transaction(10, 3, 3.00, "2020-01-04", "222", "Biz 2")
    _make_fpds_transaction(11, 3, 4.00, "2020-02-05", "222", "Biz 2")
    _make_fpds_transaction(12, 1, 13.00, "2020-03-06", "222", "Biz 2")

    # Awards
    mommy.make("awards.Award", id=1, latest_transaction_id=12)
    mommy.make("awards.Award", id=2, latest_transaction_id=6)
    mommy.make("awards.Award", id=3, latest_transaction_id=9)

    # Push DB data into the test ES cluster
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index)

    search = TransactionSearch()
    total = search.handle_count()
    assert total == 12, "Should have seen 12 documents indexed for this test"

    group_by_agg = A("terms",
                     field="recipient_hash",
                     size=1,
                     shard_size=1,
                     order={"sum_agg": "desc"})
    sum_agg = A("sum", field="generated_pragmatic_obligation")
    search.aggs.bucket("results", group_by_agg).metric("sum_agg", sum_agg)

    logging.getLogger("console").debug(
        f"=>->=>->=>-> WILL RUN THIS ES QUERY: \n {search.extra(size=0).to_dict()}"
    )
    response = search.extra(size=0).handle_execute().to_dict()
    results = []
    for bucket in response["aggregations"]["results"]["buckets"]:
        results.append({
            "key": bucket["key"],
            "sum": bucket["sum_agg"]["value"]
        })
    assert len(results) == 1
    assert results[0]["key"] == str(
        recipient2
    ), "The 'Top 1' sum agg incorrectly chose the recipient with a lesser total sum"
    assert results[0][
        "sum"] == 31.0, "The 'Top 1' sum agg incorrectly summed up recipient totals"

示例#16

0

显示文件

def obtain_recipient_totals(recipient_id, children=False, year="latest"):
    """Extract the total amount and transaction count for the recipient_hash given the time frame

    Args:
        recipient_id: string of hash(duns, name)-[recipient-level]
        children: whether or not to group by children
        year: the year the totals/counts are based on
    Returns:
        list of dictionaries representing hashes and their totals/counts
    """
    filters = reshape_filters(recipient_id=recipient_id, year=year)
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        filters)

    search = TransactionSearch().filter(filter_query)

    if children:
        group_by_field = "recipient_agg_key"
    elif recipient_id[-2:] == "-P":
        group_by_field = "parent_recipient_hash"
    else:
        group_by_field = "recipient_hash"

    bucket_count = get_number_of_unique_terms_for_transactions(
        filter_query, f"{group_by_field}.hash")

    if bucket_count == 0:
        return []

    # Not setting the shard_size since the number of child recipients under a
    # parent recipient will not exceed 10k
    group_by_recipient = A("terms", field=group_by_field, size=bucket_count)

    sum_obligation = get_scaled_sum_aggregations(
        "generated_pragmatic_obligation")["sum_field"]

    filter_loans = A("filter", terms={"type": list(loan_type_mapping.keys())})
    sum_face_value_loan = get_scaled_sum_aggregations(
        "face_value_loan_guarantee")["sum_field"]

    search.aggs.bucket("group_by_recipient", group_by_recipient)
    search.aggs["group_by_recipient"].metric("sum_obligation", sum_obligation)
    search.aggs["group_by_recipient"].bucket("filter_loans", filter_loans)
    search.aggs["group_by_recipient"]["filter_loans"].metric(
        "sum_face_value_loan", sum_face_value_loan)

    response = search.handle_execute()
    response_as_dict = response.aggs.to_dict()
    recipient_info_buckets = response_as_dict.get("group_by_recipient",
                                                  {}).get("buckets", [])

    result_list = []

    for bucket in recipient_info_buckets:
        result = {}
        if children:
            recipient_info = json.loads(bucket.get("key"))
            hash_with_level = recipient_info.get("hash_with_level") or None
            result = {
                "recipient_hash":
                hash_with_level[:-2] if hash_with_level else None,
                "recipient_unique_id": recipient_info.get("unique_id"),
                "recipient_name": recipient_info.get("name"),
            }
        loan_info = bucket.get("filter_loans", {})
        result.update({
            "total_obligation_amount":
            int(bucket.get("sum_obligation", {"value": 0})["value"]) /
            Decimal("100"),
            "total_obligation_count":
            bucket.get("doc_count", 0),
            "total_face_value_loan_amount":
            int(loan_info.get("sum_face_value_loan", {"value": 0})["value"]) /
            Decimal("100"),
            "total_face_value_loan_count":
            loan_info.get("doc_count", 0),
        })
        result_list.append(result)

    return result_list