def get_total_results(keyword): group_by_agg_key_values = { "filters": { category: { "terms": { "type": types } } for category, types in INDEX_ALIASES_TO_AWARD_TYPES.items() } } aggs = A("filters", **group_by_agg_key_values) filter_query = QueryWithFilters.generate_transactions_elasticsearch_query( {"keyword_search": [es_minimal_sanitize(keyword)]}) search = TransactionSearch().filter(filter_query) search.aggs.bucket("types", aggs) response = search.handle_execute() if response is not None: try: return response["aggregations"]["types"]["buckets"] except KeyError: logger.error("Unexpected Response") else: logger.error("No Response") return None
def get_sum_and_count_aggregation_results(keyword): filter_query = QueryWithFilters.generate_transactions_elasticsearch_query( {"keyword_search": [es_minimal_sanitize(keyword)]}) search = TransactionSearch().filter(filter_query) search.aggs.bucket("prime_awards_obligation_amount", {"sum": { "field": "transaction_amount" }}) search.aggs.bucket("prime_awards_count", {"value_count": { "field": "transaction_id" }}) response = search.handle_execute() if response is not None: try: results = {} results["prime_awards_count"] = response["aggregations"][ "prime_awards_count"]["value"] results["prime_awards_obligation_amount"] = round( response["aggregations"]["prime_awards_obligation_amount"] ["value"], 2) return results except KeyError: logger.exception("Unexpected Response") else: return None
def get_sum_aggregation_results(keyword, field="transaction_amount"): group_by_agg_key_values = {"field": field} aggs = A("sum", **group_by_agg_key_values) filter_query = QueryWithFilters.generate_transactions_elasticsearch_query( {"keywords": es_minimal_sanitize(keyword)}) search = TransactionSearch().filter(filter_query) search.aggs.bucket("transaction_sum", aggs) response = search.handle_execute() if response: return response["aggregations"] else: return None
def get_download_ids(keyword, field, size=10000): """ returns a generator that yields list of transaction ids in chunksize SIZE Note: this only works for fields in ES of integer type. """ n_iter = DOWNLOAD_QUERY_SIZE // size results = get_total_results(keyword) if results is None: logger.error( "Error retrieving total results. Max number of attempts reached") return total = sum(results[category]["doc_count"] for category in INDEX_ALIASES_TO_AWARD_TYPES.keys()) required_iter = (total // size) + 1 n_iter = min(max(1, required_iter), n_iter) for i in range(n_iter): filter_query = QueryWithFilters.generate_transactions_elasticsearch_query( {"keyword_search": [es_minimal_sanitize(keyword)]}) search = TransactionSearch().filter(filter_query) group_by_agg_key_values = { "field": field, "include": { "partition": i, "num_partitions": n_iter }, "size": size, "shard_size": size, } aggs = A("terms", **group_by_agg_key_values) search.aggs.bucket("results", aggs) response = search.handle_execute() if response is None: raise Exception("Breaking generator, unable to reach cluster") results = [] for result in response["aggregations"]["results"]["buckets"]: results.append(result["key"]) yield results
def post(self, request): models = [{ "name": "fields", "key": "fields", "type": "array", "array_type": "text", "text_type": "search", "optional": False, }] models.extend(copy.deepcopy(AWARD_FILTER)) models.extend(copy.deepcopy(PAGINATION)) for m in models: if m["name"] in ("keywords", "award_type_codes", "sort"): m["optional"] = False validated_payload = TinyShield(models).block(request.data) record_num = (validated_payload["page"] - 1) * validated_payload["limit"] if record_num >= settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW: raise UnprocessableEntityException( "Page #{page} of size {limit} is over the maximum result limit ({es_limit}). Consider using custom data downloads to obtain large data sets." .format( page=validated_payload["page"], limit=validated_payload["limit"], es_limit=settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW, )) if validated_payload["sort"] not in validated_payload["fields"]: raise InvalidParameterException( "Sort value not found in fields: {}".format( validated_payload["sort"])) if "filters" in validated_payload and "no intersection" in validated_payload[ "filters"]["award_type_codes"]: # "Special case": there will never be results when the website provides this value return Response({ "limit": validated_payload["limit"], "results": [], "page_metadata": { "page": validated_payload["page"], "next": None, "previous": None, "hasNext": False, "hasPrevious": False, }, }) sorts = { TRANSACTIONS_LOOKUP[validated_payload["sort"]]: validated_payload["order"] } lower_limit = (validated_payload["page"] - 1) * validated_payload["limit"] upper_limit = ( validated_payload["page"]) * validated_payload["limit"] + 1 validated_payload["filters"]["keyword_search"] = [ es_minimal_sanitize(x) for x in validated_payload["filters"]["keywords"] ] validated_payload["filters"].pop("keywords") filter_query = QueryWithFilters.generate_transactions_elasticsearch_query( validated_payload["filters"]) search = TransactionSearch().filter(filter_query).sort( sorts)[lower_limit:upper_limit] response = search.handle_execute() return Response( self.build_elasticsearch_result(validated_payload, response))