def test_execute_sql_to_ordered_dictionary(): assert execute_sql_to_ordered_dictionary( RAW_SQL) == EXPECTED_RESPONSE_ORDERED_DICTIONARY assert execute_sql_to_ordered_dictionary( SQL(RAW_SQL)) == EXPECTED_RESPONSE_ORDERED_DICTIONARY assert execute_sql_to_ordered_dictionary( RAW_SQL, User) == EXPECTED_RESPONSE_ORDERED_DICTIONARY
def _business_logic(request_data: dict) -> tuple: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data['award_id'] hide_edge_cases = request_data.get('hide_edge_cases') hide_edges_awarded_amount = '' hide_edges_end_date = '' award_id_column = 'award_id' if type( award_id) is int else 'generated_unique_award_id' if hide_edge_cases: hide_edges_awarded_amount = "and ca.base_and_all_options_value > 0 and ca.total_obligation > 0" hide_edges_end_date = "where tf.period_of_perf_potential_e is not null" sql = COUNT_ACTIVITY_HIDDEN_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), hide_edges_awarded_amount=SQL(hide_edges_awarded_amount), hide_edges_end_date=SQL(hide_edges_end_date)) else: sql = COUNT_ACTIVITY_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id)) overall_count_results = execute_sql_to_ordered_dictionary(sql) overall_count = overall_count_results[0][ 'rollup_contract_count'] if overall_count_results else 0 sql = ACTIVITY_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), limit=Literal(request_data['limit'] + 1), offset=Literal((request_data['page'] - 1) * request_data['limit']), hide_edges_awarded_amount=SQL(hide_edges_awarded_amount), hide_edges_end_date=SQL(hide_edges_end_date)) return execute_sql_to_ordered_dictionary(sql), overall_count
def fetch_account_details_award(award_id: int) -> dict: award_id_sql = "faba.award_id = {award_id}".format(award_id=award_id) results = execute_sql_to_ordered_dictionary( defc_sql.format(award_id_sql=award_id_sql)) outlay_by_code = [] obligation_by_code = [] total_outlay = 0 total_obligations = 0 covid_defcs = DisasterEmergencyFundCode.objects.filter( group_name="covid_19").values_list("code", flat=True) for row in results: if row["disaster_emergency_fund_code"] in covid_defcs: total_outlay += row["total_outlay"] total_obligations += row["obligated_amount"] outlay_by_code.append({ "code": row["disaster_emergency_fund_code"], "amount": row["total_outlay"] }) obligation_by_code.append({ "code": row["disaster_emergency_fund_code"], "amount": row["obligated_amount"] }) results = { "total_account_outlay": total_outlay, "total_account_obligation": total_obligations, "account_outlays_by_defc": outlay_by_code, "account_obligations_by_defc": obligation_by_code, } return results
def _business_logic(request_data: dict) -> OrderedDict: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data["award_id"] award_id_column = "award_id" if type(award_id) is int else "generated_unique_award_id" sql = ROLLUP_SQL.format(award_id_column=Identifier(award_id_column), award_id=Literal(award_id)) return execute_sql_to_ordered_dictionary(sql)[0]
def check_awards_for_deletes(id_list): formatted_value_ids = "" for x in id_list: formatted_value_ids += "('" + x + "')," sql = """ SELECT x.generated_unique_award_id FROM (values {ids}) AS x(generated_unique_award_id) LEFT JOIN awards a ON a.generated_unique_award_id = x.generated_unique_award_id WHERE a.generated_unique_award_id is null""" results = execute_sql_to_ordered_dictionary( sql.format(ids=formatted_value_ids[:-1])) return results
def _get_whatever(table_name: str, schema_name: str, dblink_name: str, sql: str, data_types: DataTypes): """ The common bits of subsequent functions. """ table = make_composed_qualified_table_name(table_name, schema_name) sql = SQL(sql).format(table=table) if dblink_name is not None: sql = wrap_dblink_query(dblink_name, sql, "r", list(data_types), data_types) # IMPORTANT: Even though this is a read only operation, since this is being run in support of # a writable operation, we need to run it against the writable connection else we will be # unable to see objects living in our transaction if there is one. return sql_helpers.execute_sql_to_ordered_dictionary(sql, read_only=False)
def _business_logic(request_data: dict) -> tuple: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data['award_id'] award_id_column = 'award_id' if type( award_id) is int else 'generated_unique_award_id' sql = COUNT_ACTIVITY_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id)) overall_count_results = execute_sql_to_ordered_dictionary(sql) overall_count = overall_count_results[0][ 'rollup_contract_count'] if overall_count_results else 0 sql = ACTIVITY_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), limit=Literal(request_data['limit'] + 1), offset=Literal((request_data['page'] - 1) * request_data['limit']), ) return execute_sql_to_ordered_dictionary(sql), overall_count
def _business_logic(request_data: dict, columns: str, group_by: str) -> list: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data['award_id'] award_id_column = 'award_id' if type(award_id) is int else 'generated_unique_award_id' sql = FUNDING_TREEMAP_SQL.format( columns=SQL(columns), award_id_column=Identifier(award_id_column), award_id=Literal(award_id), group_by=SQL(group_by), ) return execute_sql_to_ordered_dictionary(sql)
def _business_logic(request_data: dict) -> list: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data["award_id"] award_id_column = "award_id" if type(award_id) is int else "generated_unique_award_id" sql = ACCOUNTS_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), order_by=SQL(SORTABLE_COLUMNS[request_data["sort"]]), order_direction=SQL(request_data["order"]), ) return execute_sql_to_ordered_dictionary(sql)
def _business_logic(request_data: dict) -> list: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data["award_id"] award_id_column = "id" if type(award_id) is int else "generated_unique_award_id" sql = FUNDING_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), order_by=build_composable_order_by(SORTABLE_COLUMNS[request_data["sort"]], request_data["order"]), limit=Literal(request_data["limit"] + 1), offset=Literal((request_data["page"] - 1) * request_data["limit"]), ) return execute_sql_to_ordered_dictionary(sql)
def _business_logic(request_data: dict, columns: object, group_by: object, order_by: object) -> list: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data['award_id'] award_id_column = 'award_id' if type(award_id) is int else 'generated_unique_award_id' sql = FUNDING_TREEMAP_SQL.format( columns=columns, award_id_column=Identifier(award_id_column), award_id=Literal(award_id), group_by=group_by, order_by=order_by, ) return execute_sql_to_ordered_dictionary(sql)
def _business_logic(request_data: dict) -> list: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data['award_id'] award_id_column = 'award_id' if type(award_id) is int else 'generated_unique_award_id' sql = GET_FUNDING_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), piid=Literal(request_data.get('piid')), order_by=build_composable_order_by(SORTABLE_COLUMNS[request_data['sort']], request_data['order']), limit=Literal(request_data['limit'] + 1), offset=Literal((request_data['page'] - 1) * request_data['limit']), ) return execute_sql_to_ordered_dictionary(sql)
def _business_logic(request_data: dict) -> list: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data['award_id'] award_id_column = 'award_id' if type(award_id) is int else 'generated_unique_award_id' sql = GET_FUNDING_SQL.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), piid=Literal(request_data.get('piid')), order_by=build_composable_order_by(SORTABLE_COLUMNS[request_data['sort']], request_data['order']), limit=Literal(request_data['limit'] + 1), offset=Literal((request_data['page'] - 1) * request_data['limit']), ) return execute_sql_to_ordered_dictionary(sql)
def _business_logic(request_data: dict) -> list: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data["award_id"] award_id_column = "award_id" if type(award_id) is int else "generated_unique_award_id" sql = TYPE_TO_SQL_MAPPING[request_data["type"]] sql = sql.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), sort_column=Identifier(request_data["sort"]), sort_direction=SQL(request_data["order"]), limit=Literal(request_data["limit"] + 1), offset=Literal((request_data["page"] - 1) * request_data["limit"]), ) return execute_sql_to_ordered_dictionary(sql)
def _business_logic(request_data: dict) -> list: # By this point, our award_id has been validated and cleaned up by # TinyShield. We will either have an internal award id that is an # integer or a generated award id that is a string. award_id = request_data['award_id'] award_id_column = 'award_id' if type( award_id) is int else 'generated_unique_award_id' sql = GET_IDVS_SQL if request_data['idv'] else GET_CONTRACTS_SQL sql = sql.format( award_id_column=Identifier(award_id_column), award_id=Literal(award_id), sort_column=Identifier(request_data['sort']), sort_direction=SQL(request_data['order']), limit=Literal(request_data['limit'] + 1), offset=Literal((request_data['page'] - 1) * request_data['limit']), ) return execute_sql_to_ordered_dictionary(sql)
def fetch_account_details_idv(award_id, award_id_column) -> dict: if award_id_column != "award_id": award_id = re.sub(r"[']", r"''", award_id) children = execute_sql_to_ordered_dictionary( child_award_sql.format(award_id=award_id, award_id_column=award_id_column)) grandchildren = execute_sql_to_ordered_dictionary( grandchild_award_sql.format(award_id=award_id, award_id_column=award_id_column)) covid_defcs = DisasterEmergencyFundCode.objects.filter( group_name="covid_19").values_list("code", flat=True) child_award_ids = [] grandchild_award_ids = [] child_award_ids.extend([x["award_id"] for x in children]) grandchild_award_ids.extend([x["award_id"] for x in grandchildren]) award_id_sql = "faba.award_id in {award_id}".format( award_id="(" + str(child_award_ids).strip("[]") + ")") child_results = (execute_sql_to_ordered_dictionary( defc_sql.format( award_id_sql=award_id_sql)) if child_award_ids != [] else {}) award_id_sql = "faba.award_id in {award_id}".format( award_id="(" + str(grandchild_award_ids).strip("[]") + ")") grandchild_results = (execute_sql_to_ordered_dictionary( defc_sql.format( award_id_sql=award_id_sql)) if grandchild_award_ids != [] else {}) child_outlay_by_code = [] child_obligation_by_code = [] child_total_outlay = 0 child_total_obligations = 0 for row in child_results: if row["disaster_emergency_fund_code"] in covid_defcs: child_total_outlay += row["total_outlay"] child_total_obligations += row["obligated_amount"] child_outlay_by_code.append({ "code": row["disaster_emergency_fund_code"], "amount": row["total_outlay"] }) child_obligation_by_code.append({ "code": row["disaster_emergency_fund_code"], "amount": row["obligated_amount"] }) grandchild_outlay_by_code = [] grandchild_obligation_by_code = [] grandchild_total_outlay = 0 grandchild_total_obligations = 0 for row in grandchild_results: if row["disaster_emergency_fund_code"] in covid_defcs: grandchild_total_outlay += row["total_outlay"] grandchild_total_obligations += row["obligated_amount"] grandchild_outlay_by_code.append({ "code": row["disaster_emergency_fund_code"], "amount": row["total_outlay"] }) grandchild_obligation_by_code.append({ "code": row["disaster_emergency_fund_code"], "amount": row["obligated_amount"] }) results = { "child_total_account_outlay": child_total_outlay, "child_total_account_obligation": child_total_obligations, "child_account_outlays_by_defc": child_outlay_by_code, "child_account_obligations_by_defc": child_obligation_by_code, "grandchild_total_account_outlay": grandchild_total_outlay, "grandchild_total_account_obligation": grandchild_total_obligations, "grandchild_account_outlays_by_defc": grandchild_outlay_by_code, "grandchild_account_obligations_by_defc": grandchild_obligation_by_code, } return results
class SpendingByAwardVisualizationViewSet(APIView): """ This route takes award filters and fields, and returns the fields of the filtered awards. """ endpoint_doc = "usaspending_api/api_contracts/contracts/v2/search/spending_by_award.md" @cache_response() def post(self, request): """Return all awards matching the provided filters and limits""" json_request = self.validate_request_data(request.data) self.is_subaward = json_request["subawards"] self.constants = GLOBAL_MAP[ "subaward"] if self.is_subaward else GLOBAL_MAP["award"] self.filters = add_date_range_comparison_types( json_request.get("filters"), self.is_subaward, gte_date_type="action_date", lte_date_type="date_signed") self.fields = json_request["fields"] self.pagination = { "limit": json_request["limit"], "lower_bound": (json_request["page"] - 1) * json_request["limit"], "page": json_request["page"], "sort_key": json_request.get("sort") or self.fields[0], "sort_order": json_request["order"], "upper_bound": json_request["page"] * json_request["limit"] + 1, } self.elasticsearch = is_experimental_elasticsearch_api(request) if self.if_no_intersection( ): # Like an exception, but API response is a HTTP 200 with a JSON payload return Response(self.populate_response(results=[], has_next=False)) raise_if_award_types_not_valid_subset(self.filters["award_type_codes"], self.is_subaward) raise_if_sort_key_not_valid(self.pagination["sort_key"], self.fields, self.is_subaward) if self.elasticsearch and not self.is_subaward: self.last_id = json_request.get("last_id") self.last_value = json_request.get("last_value") logger.info( "Using experimental Elasticsearch functionality for 'spending_by_award'" ) results = self.query_elasticsearch() return Response(self.construct_es_reponse(results)) return Response(self.create_response(self.construct_queryset())) @staticmethod def validate_request_data(request_data): models = [ { "name": "fields", "key": "fields", "type": "array", "array_type": "text", "text_type": "search", "min": 1 }, { "name": "subawards", "key": "subawards", "type": "boolean", "default": False }, { "name": "object_class", "key": "filter|object_class", "type": "array", "array_type": "text", "text_type": "search", }, { "name": "program_activity", "key": "filter|program_activity", "type": "array", "array_type": "integer", "array_max": maxsize, }, { "name": "last_id", "key": "last_id", "type": "text", "text_type": "search", "required": False, "allow_nulls": True, }, { "name": "last_value", "key": "last_value", "type": "float", "required": False, "allow_nulls": True }, ] models.extend(copy.deepcopy(AWARD_FILTER)) models.extend(copy.deepcopy(PAGINATION)) for m in models: if m["name"] in ("award_type_codes", "fields"): m["optional"] = False return TinyShield(models).block(request_data) def if_no_intersection(self): # "Special case" behavior: there will never be results when the website provides this value return "no intersection" in self.filters["award_type_codes"] def construct_queryset(self): sort_by_fields = self.get_sort_by_fields() database_fields = self.get_database_fields() base_queryset = self.constants["filter_queryset_func"](self.filters) queryset = self.annotate_queryset(base_queryset) queryset = self.custom_queryset_order_by(queryset, sort_by_fields, self.pagination["sort_order"]) return queryset.values( *list(database_fields))[self.pagination["lower_bound"]:self. pagination["upper_bound"]] def create_response(self, queryset): results = [] for record in queryset[:self.pagination["limit"]]: row = { k: record[v] for k, v in self.constants["internal_id_fields"].items() } for field in self.fields: row[field] = record.get( self.constants["type_code_to_field_map"][record[ self.constants["award_semaphore"]]].get(field)) if "Award ID" in self.fields: for id_type in self.constants["award_id_fields"]: if record[id_type]: row["Award ID"] = record[id_type] break results.append(row) results = self.add_award_generated_id_field(results) return self.populate_response( results=results, has_next=len(queryset) > self.pagination["limit"]) def add_award_generated_id_field(self, records): """Obtain the generated_unique_award_id and add to response""" dest, source = self.constants["generated_award_field"] internal_ids = [record[source] for record in records] award_ids = Award.objects.filter(id__in=internal_ids).values_list( "id", "generated_unique_award_id") award_ids = {internal_id: guai for internal_id, guai in award_ids} for record in records: record[dest] = award_ids.get( record[source]) # defensive, in case there is a discrepancy return records def get_sort_by_fields(self): if self.pagination["sort_key"] == "Award ID": sort_by_fields = self.constants["award_id_fields"] elif self.is_subaward: if set(self.filters["award_type_codes"]) <= set( procurement_type_mapping): sort_by_fields = [ contract_subaward_mapping[self.pagination["sort_key"]] ] elif set(self.filters["award_type_codes"]) <= set( assistance_type_mapping): sort_by_fields = [ grant_subaward_mapping[self.pagination["sort_key"]] ] else: if set(self.filters["award_type_codes"]) <= set( contract_type_mapping): sort_by_fields = [ award_contracts_mapping[self.pagination["sort_key"]] ] elif set(self.filters["award_type_codes"]) <= set( loan_type_mapping): sort_by_fields = [ loan_award_mapping[self.pagination["sort_key"]] ] elif set( self.filters["award_type_codes"]) <= set(idv_type_mapping): sort_by_fields = [ award_idv_mapping[self.pagination["sort_key"]] ] elif set(self.filters["award_type_codes"]) <= set( non_loan_assistance_type_mapping): sort_by_fields = [ non_loan_assistance_award_mapping[ self.pagination["sort_key"]] ] return sort_by_fields def get_elastic_sort_by_fields(self): if self.pagination["sort_key"] == "Award ID": sort_by_fields = ["display_award_id"] else: if set(self.filters["award_type_codes"]) <= set( contract_type_mapping): sort_by_fields = [ contracts_mapping[self.pagination["sort_key"]] ] elif set(self.filters["award_type_codes"]) <= set( loan_type_mapping): sort_by_fields = [loan_mapping[self.pagination["sort_key"]]] elif set( self.filters["award_type_codes"]) <= set(idv_type_mapping): sort_by_fields = [idv_mapping[self.pagination["sort_key"]]] elif set(self.filters["award_type_codes"]) <= set( non_loan_assistance_type_mapping): sort_by_fields = [ non_loan_assist_mapping[self.pagination["sort_key"]] ] if self.last_id: sort_by_fields.append( {"generated_unique_award_id.keyword": "desc"}) return sort_by_fields def get_database_fields(self): values = copy.copy(self.constants["minimum_db_fields"]) for field in self.fields: for mapping in self.constants["api_to_db_mapping_list"]: if mapping.get(field): values.add(mapping.get(field)) return values def annotate_queryset(self, queryset): for field, function in self.constants["annotations"].items(): queryset = function(field, queryset) return queryset def custom_queryset_order_by(self, queryset, sort_field_names, order): """ Explicitly set NULLS LAST in the ordering to encourage the usage of the indexes.""" if order == "desc": order_by_list = [ F(field).desc(nulls_last=True) for field in sort_field_names ] else: order_by_list = [ F(field).asc(nulls_last=True) for field in sort_field_names ] return queryset.order_by(*order_by_list) def populate_response(self, results: list, has_next: bool) -> dict: return { "limit": self.pagination["limit"], "results": results, "page_metadata": { "page": self.pagination["page"], "hasNext": has_next }, "messages": [get_time_period_message()], } def query_elasticsearch(self) -> list: filter_query = QueryWithFilters.generate_awards_elasticsearch_query( self.filters) sort_field = self.get_elastic_sort_by_fields() sorts = [{ field: self.pagination["sort_order"] } for field in sort_field] search = ((AwardSearch().filter(filter_query).sort(*sorts).extra( search_after=[self.last_value, self.last_id] )[0:self.pagination["limit"]]) if self.last_value and self.last_id else (AwardSearch().filter(filter_query).sort( *sorts)[((self.pagination["page"] - 1) * self.pagination["limit"]):( ((self.pagination["page"] - 1) * self.pagination["limit"]) + self.pagination["limit"])])) response = search.handle_execute() return response def construct_es_reponse(self, response) -> dict: results = [] for res in response: hit = res.to_dict() row = { k: hit[v] for k, v in self.constants["internal_id_fields"].items() } for field in self.fields: row[field] = hit.get( self.constants["elasticsearch_type_code_to_field_map"][hit[ self.constants["award_semaphore"]]].get(field)) row["internal_id"] = int(row["internal_id"]) if row.get("Loan Value"): row["Loan Value"] = float(row["Loan Value"]) if row.get("Subsidy Cost"): row["Subsidy Cost"] = float(row["Subsidy Cost"]) if row.get("Award Amount"): row["Award Amount"] = float(row["Award Amount"]) row["generated_internal_id"] = hit["generated_unique_award_id"] row["recipient_id"] = hit.get("recipient_unique_id") row["parent_recipient_unique_id"] = hit.get( "parent_recipient_unique_id") if "Award ID" in self.fields: row["Award ID"] = hit["display_award_id"] row = self.append_recipient_hash_level(row) row.pop("parent_recipient_unique_id") results.append(row) last_id = None last_value = None if len(response) > 0: last_id = response[len(response) - 1].to_dict().get("generated_unique_award_id") last_value = ( response[len(response) - 1].to_dict().get("total_loan_value") if set(self.filters["award_type_codes"]) <= set(loan_type_mapping) else response[len(response) - 1].to_dict().get("total_obligation")) return { "limit": self.pagination["limit"], "results": results, "page_metadata": { "page": self.pagination["page"], "hasNext": response.hits.total - (self.pagination["page"] - 1) * self.pagination["limit"] > self.pagination["limit"], "last_id": last_id, "last_value": last_value, }, "messages": [get_time_period_message()], } def append_recipient_hash_level(self, result) -> dict: id = result.get("recipient_id") parent_id = result.get("parent_recipient_unique_id") if id: sql = """( select rp.recipient_hash || '-' || rp.recipient_level as hash from recipient_profile rp inner join recipient_lookup rl on rl.recipient_hash = rp.recipient_hash where rl.duns = {recipient_id} and rp.recipient_level = case when {parent_recipient_unique_id} is null then 'R' else 'C' end and rp.recipient_name not in {special_cases} )""" special_cases = ["'" + case + "'" for case in SPECIAL_CASES] SQL = sql.format( recipient_id="'" + id + "'", parent_recipient_unique_id=parent_id if parent_id else "null", special_cases="(" + ", ".join(special_cases) + ")", ) row = execute_sql_to_ordered_dictionary(SQL) if len(row) > 0: result["recipient_id"] = row[0].get("hash") else: result["recipient_id"] = None
def mock_execute_sql(sql, results, verbosity=None): return execute_sql_to_ordered_dictionary(sql)
class SpendingByAwardVisualizationViewSet(APIView): """ This route takes award filters and fields, and returns the fields of the filtered awards. """ endpoint_doc = "usaspending_api/api_contracts/contracts/v2/search/spending_by_award.md" @cache_response() def post(self, request): """Return all awards matching the provided filters and limits""" self.original_filters = request.data.get("filters") json_request = self.validate_request_data(request.data) self.is_subaward = json_request["subawards"] self.constants = GLOBAL_MAP[ "subaward"] if self.is_subaward else GLOBAL_MAP["award"] self.filters = add_date_range_comparison_types( json_request.get("filters"), self.is_subaward, gte_date_type="action_date", lte_date_type="date_signed") self.fields = json_request["fields"] self.pagination = { "limit": json_request["limit"], "lower_bound": (json_request["page"] - 1) * json_request["limit"], "page": json_request["page"], "sort_key": json_request.get("sort") or self.fields[0], "sort_order": json_request["order"], "upper_bound": json_request["page"] * json_request["limit"] + 1, } self.elasticsearch = is_experimental_elasticsearch_api(request) if not self.elasticsearch: mirror_request_to_elasticsearch(request) if self.if_no_intersection( ): # Like an exception, but API response is a HTTP 200 with a JSON payload return Response(self.populate_response(results=[], has_next=False)) raise_if_award_types_not_valid_subset(self.filters["award_type_codes"], self.is_subaward) raise_if_sort_key_not_valid(self.pagination["sort_key"], self.fields, self.is_subaward) if self.elasticsearch and not self.is_subaward: self.last_record_unique_id = json_request.get( "last_record_unique_id") self.last_record_sort_value = json_request.get( "last_record_sort_value") logger.info( "Using experimental Elasticsearch functionality for 'spending_by_award'" ) return Response( self.construct_es_response(self.query_elasticsearch())) return Response(self.create_response(self.construct_queryset())) @staticmethod def validate_request_data(request_data): models = [ { "name": "fields", "key": "fields", "type": "array", "array_type": "text", "text_type": "search", "min": 1 }, { "name": "subawards", "key": "subawards", "type": "boolean", "default": False }, { "name": "object_class", "key": "filter|object_class", "type": "array", "array_type": "text", "text_type": "search", }, { "name": "program_activity", "key": "filter|program_activity", "type": "array", "array_type": "integer", "array_max": maxsize, }, { "name": "last_record_unique_id", "key": "last_record_unique_id", "type": "integer", "required": False, "allow_nulls": True, }, { "name": "last_record_sort_value", "key": "last_record_sort_value", "type": "text", "text_type": "search", "required": False, "allow_nulls": True, }, ] models.extend(copy.deepcopy(AWARD_FILTER_NO_RECIPIENT_ID)) models.extend(copy.deepcopy(PAGINATION)) for m in models: if m["name"] in ("award_type_codes", "fields"): m["optional"] = False return TinyShield(models).block(request_data) def if_no_intersection(self): # "Special case" behavior: there will never be results when the website provides this value return "no intersection" in self.filters["award_type_codes"] def construct_queryset(self): sort_by_fields = self.get_sort_by_fields() database_fields = self.get_database_fields() base_queryset = self.constants["filter_queryset_func"](self.filters) queryset = self.annotate_queryset(base_queryset) queryset = self.custom_queryset_order_by(queryset, sort_by_fields, self.pagination["sort_order"]) return queryset.values( *list(database_fields))[self.pagination["lower_bound"]:self. pagination["upper_bound"]] def create_response(self, queryset): results = [] rows = list(queryset) for record in rows[:self.pagination["limit"]]: row = { k: record[v] for k, v in self.constants["internal_id_fields"].items() } for field in self.fields: row[field] = record.get( self.constants["type_code_to_field_map"][record[ self.constants["award_semaphore"]]].get(field)) if "Award ID" in self.fields: for id_type in self.constants["award_id_fields"]: if record[id_type]: row["Award ID"] = record[id_type] break results.append(row) results = self.add_award_generated_id_field(results) return self.populate_response( results=results, has_next=len(rows) > self.pagination["limit"]) def add_award_generated_id_field(self, records): """Obtain the generated_unique_award_id and add to response""" dest, source = self.constants["generated_award_field"] internal_ids = [record[source] for record in records] award_ids = Award.objects.filter(id__in=internal_ids).values_list( "id", "generated_unique_award_id") award_ids = {internal_id: guai for internal_id, guai in award_ids} for record in records: record[dest] = award_ids.get( record[source]) # defensive, in case there is a discrepancy return records def get_sort_by_fields(self): if self.pagination["sort_key"] == "Award ID": sort_by_fields = self.constants["award_id_fields"] elif self.is_subaward: if set(self.filters["award_type_codes"]) <= set( procurement_type_mapping): sort_by_fields = [ contract_subaward_mapping[self.pagination["sort_key"]] ] elif set(self.filters["award_type_codes"]) <= set( assistance_type_mapping): sort_by_fields = [ grant_subaward_mapping[self.pagination["sort_key"]] ] else: if set(self.filters["award_type_codes"]) <= set( contract_type_mapping): sort_by_fields = [ award_contracts_mapping[self.pagination["sort_key"]] ] elif set(self.filters["award_type_codes"]) <= set( loan_type_mapping): sort_by_fields = [ loan_award_mapping[self.pagination["sort_key"]] ] elif set( self.filters["award_type_codes"]) <= set(idv_type_mapping): sort_by_fields = [ award_idv_mapping[self.pagination["sort_key"]] ] elif set(self.filters["award_type_codes"]) <= set( non_loan_assistance_type_mapping): sort_by_fields = [ non_loan_assistance_award_mapping[ self.pagination["sort_key"]] ] return sort_by_fields def get_elastic_sort_by_fields(self): if self.pagination["sort_key"] == "Award ID": sort_by_fields = ["display_award_id"] else: if set(self.filters["award_type_codes"]) <= set( contract_type_mapping): sort_by_fields = [ contracts_mapping[self.pagination["sort_key"]] ] elif set(self.filters["award_type_codes"]) <= set( loan_type_mapping): sort_by_fields = [loan_mapping[self.pagination["sort_key"]]] elif set( self.filters["award_type_codes"]) <= set(idv_type_mapping): sort_by_fields = [idv_mapping[self.pagination["sort_key"]]] elif set(self.filters["award_type_codes"]) <= set( non_loan_assistance_type_mapping): sort_by_fields = [ non_loan_assist_mapping[self.pagination["sort_key"]] ] sort_by_fields.append("award_id") return sort_by_fields def get_database_fields(self): values = copy.copy(self.constants["minimum_db_fields"]) for field in self.fields: for mapping in self.constants["api_to_db_mapping_list"]: if mapping.get(field): values.add(mapping.get(field)) return values def annotate_queryset(self, queryset): for field, function in self.constants["annotations"].items(): queryset = function(field, queryset) return queryset def custom_queryset_order_by(self, queryset, sort_field_names, order): """ Explicitly set NULLS LAST in the ordering to encourage the usage of the indexes.""" if order == "desc": order_by_list = [ F(field).desc(nulls_last=True) for field in sort_field_names ] else: order_by_list = [ F(field).asc(nulls_last=True) for field in sort_field_names ] return queryset.order_by(*order_by_list) def populate_response(self, results: list, has_next: bool) -> dict: return { "limit": self.pagination["limit"], "results": results, "page_metadata": { "page": self.pagination["page"], "hasNext": has_next }, "messages": get_generic_filters_message( self.original_filters.keys(), [elem["name"] for elem in AWARD_FILTER_NO_RECIPIENT_ID]), } def date_to_epoch_millis(self, date): if "DATE" in self.pagination["sort_key"].upper(): if date is not None and type(date) != "str": date = date.strftime("%Y-%m-%d") d = datetime.strptime(date, "%Y-%m-%d") date = int(d.timestamp() * 1000) return date def query_elasticsearch(self) -> list: filter_query = QueryWithFilters.generate_awards_elasticsearch_query( self.filters) sort_field = self.get_elastic_sort_by_fields() sorts = [{ field: self.pagination["sort_order"] } for field in sort_field] record_num = (self.pagination["page"] - 1) * self.pagination["limit"] # random page jumping was removed due to performance concerns if (self.last_record_sort_value is None and self.last_record_unique_id is not None) or (self.last_record_sort_value is not None and self.last_record_unique_id is None): # malformed request raise Exception( "Using search_after functionality in Elasticsearch requires both last_record_sort_value and last_record_unique_id." ) if record_num >= settings.ES_AWARDS_MAX_RESULT_WINDOW and ( self.last_record_unique_id is None and self.last_record_sort_value is None): raise UnprocessableEntityException( "Page #{page} with limit {limit} is over the maximum result limit {es_limit}. Please provide the 'last_record_sort_value' and 'last_record_unique_id' to paginate sequentially." .format( page=self.pagination["page"], limit=self.pagination["limit"], es_limit=settings.ES_AWARDS_MAX_RESULT_WINDOW, )) # Search_after values are provided in the API request - use search after if self.last_record_sort_value is not None and self.last_record_unique_id is not None: search = ( AwardSearch().filter(filter_query).sort(*sorts).extra( search_after=[ self.last_record_sort_value, self.last_record_unique_id ])[:self.pagination["limit"] + 1] # add extra result to check for next page ) # no values, within result window, use regular elasticsearch else: search = AwardSearch().filter(filter_query).sort( *sorts)[record_num:record_num + self.pagination["limit"]] response = search.handle_execute() return response def construct_es_response(self, response) -> dict: results = [] for res in response: hit = res.to_dict() row = { k: hit[v] for k, v in self.constants["internal_id_fields"].items() } # Parsing API response values from ES query result JSON # We parse the `hit` (result from elasticsearch) to get the award type, use the type to determine # which lookup dict to use, and then use that lookup to retrieve the correct value requested from `fields` for field in self.fields: row[field] = hit.get( self.constants["elasticsearch_type_code_to_field_map"][hit[ self.constants["award_semaphore"]]].get(field)) row["internal_id"] = int(row["internal_id"]) if row.get("Loan Value"): row["Loan Value"] = float(row["Loan Value"]) if row.get("Subsidy Cost"): row["Subsidy Cost"] = float(row["Subsidy Cost"]) if row.get("Award Amount"): row["Award Amount"] = float(row["Award Amount"]) row["generated_internal_id"] = hit["generated_unique_award_id"] row["recipient_id"] = hit.get("recipient_unique_id") row["parent_recipient_unique_id"] = hit.get( "parent_recipient_unique_id") if "Award ID" in self.fields: row["Award ID"] = hit["display_award_id"] row = self.append_recipient_hash_level(row) row.pop("parent_recipient_unique_id") results.append(row) last_record_unique_id = None last_record_sort_value = None offset = 1 if self.last_record_unique_id is not None: has_next = len(results) > self.pagination["limit"] offset = 2 else: has_next = ( response.hits.total.value - (self.pagination["page"] - 1) * self.pagination["limit"] > self.pagination["limit"]) if len(response) > 0 and has_next: last_record_unique_id = response[len(response) - offset].meta.sort[1] last_record_sort_value = response[len(response) - offset].meta.sort[0] return { "limit": self.pagination["limit"], "results": results[:self.pagination["limit"]], "page_metadata": { "page": self.pagination["page"], "hasNext": has_next, "last_record_unique_id": last_record_unique_id, "last_record_sort_value": str(last_record_sort_value), }, "messages": [ get_generic_filters_message( self.original_filters.keys(), [elem["name"] for elem in AWARD_FILTER_NO_RECIPIENT_ID]) ], } def append_recipient_hash_level(self, result) -> dict: if "recipient_id" not in self.fields: result.pop("recipient_id") return result id = result.get("recipient_id") parent_id = result.get("parent_recipient_unique_id") if id: sql = """( select rp.recipient_hash || '-' || rp.recipient_level as hash from recipient_profile rp inner join recipient_lookup rl on rl.recipient_hash = rp.recipient_hash where rl.duns = {recipient_id} and rp.recipient_level = case when {parent_recipient_unique_id} is null then 'R' else 'C' end and rp.recipient_name not in {special_cases} )""" special_cases = ["'" + case + "'" for case in SPECIAL_CASES] SQL = sql.format( recipient_id="'" + id + "'", parent_recipient_unique_id=parent_id if parent_id else "null", special_cases="(" + ", ".join(special_cases) + ")", ) row = execute_sql_to_ordered_dictionary(SQL) if len(row) > 0: result["recipient_id"] = row[0].get("hash") else: result["recipient_id"] = None
def test_build_composable_order_by(): connection = get_connection() with connection.cursor() as cursor: def _build_composable_order_by(*args, **kwargs): result = build_composable_order_by(*args, **kwargs) return result.as_string(cursor.connection) assert _build_composable_order_by('column') == 'order by "column"' assert _build_composable_order_by('this.column') == 'order by "this"."column"' assert _build_composable_order_by('column', 'asc') == 'order by "column" asc' assert _build_composable_order_by('column', nulls='first') == 'order by "column" nulls first' assert _build_composable_order_by('column', 'asc', 'first') == 'order by "column" asc nulls first' assert ( _build_composable_order_by(['column1', 'column2']) == 'order by "column1", "column2"' ) assert ( _build_composable_order_by(['column1', 'column2'], 'desc') == 'order by "column1" desc, "column2" desc' ) assert ( _build_composable_order_by(['column1', 'column2'], nulls='last') == 'order by "column1" nulls last, "column2" nulls last' ) assert ( _build_composable_order_by(['column1', 'column2'], 'desc', 'last') == 'order by "column1" desc nulls last, "column2" desc nulls last' ) assert ( _build_composable_order_by(['column1', 'column2'], ['asc', 'desc']) == 'order by "column1" asc, "column2" desc' ) assert ( _build_composable_order_by(['column1', 'column2'], nulls=['first', 'last']) == 'order by "column1" nulls first, "column2" nulls last' ) assert ( _build_composable_order_by(['column1', 'column2'], ['asc', 'desc'], ['first', 'last']) == 'order by "column1" asc nulls first, "column2" desc nulls last' ) assert _build_composable_order_by(None) == '' assert _build_composable_order_by('') == '' assert _build_composable_order_by([]) == '' with pytest.raises(ValueError): _build_composable_order_by([1, 2, 3]) with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], 'NOPE') with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], nulls='NOPE') with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], ['asc', 'NOPE']) with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], nulls=['first', 'NOPE']) with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], ['asc', 'asc', 'asc']) with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], nulls=['first', 'first', 'first']) _sql = ( SQL('select id, latest_transaction_id from awards a ') + SQL(_build_composable_order_by(['a.id', 'a.latest_transaction_id'], ['desc', 'asc'], ['first', 'last'])) ) assert execute_sql_to_ordered_dictionary(_sql) == [ OrderedDict((('id', 5), ('latest_transaction_id', 5))), OrderedDict((('id', 4), ('latest_transaction_id', 4))), OrderedDict((('id', 3), ('latest_transaction_id', 3))), OrderedDict((('id', 2), ('latest_transaction_id', 2))), OrderedDict((('id', 1), ('latest_transaction_id', 1))), ]
def test_execute_sql_to_ordered_dictionary(): assert execute_sql_to_ordered_dictionary(RAW_SQL) == EXPECTED_RESPONSE_ORDERED_DICTIONARY assert execute_sql_to_ordered_dictionary(SQL(RAW_SQL)) == EXPECTED_RESPONSE_ORDERED_DICTIONARY assert execute_sql_to_ordered_dictionary(RAW_SQL, User) == EXPECTED_RESPONSE_ORDERED_DICTIONARY
def test_build_composable_order_by(): connection = get_connection() with connection.cursor() as cursor: def _build_composable_order_by(*args, **kwargs): result = build_composable_order_by(*args, **kwargs) return result.as_string(cursor.connection) assert _build_composable_order_by("column") == 'order by "column"' assert _build_composable_order_by( "this.column") == 'order by "this"."column"' assert _build_composable_order_by("column", "asc") == 'order by "column" asc' assert _build_composable_order_by( "column", sort_nulls="first") == 'order by "column" nulls first' assert _build_composable_order_by( "column", "asc", "first") == 'order by "column" asc nulls first' assert _build_composable_order_by( ["column1", "column2"]) == 'order by "column1", "column2"' assert (_build_composable_order_by( ["column1", "column2"], "desc") == 'order by "column1" desc, "column2" desc') assert (_build_composable_order_by(["column1", "column2"], sort_nulls="last") == 'order by "column1" nulls last, "column2" nulls last') assert ( _build_composable_order_by(["column1", "column2"], "desc", "last") == 'order by "column1" desc nulls last, "column2" desc nulls last' ) assert (_build_composable_order_by( ["column1", "column2"], ["asc", "desc"]) == 'order by "column1" asc, "column2" desc') assert (_build_composable_order_by( ["column1", "column2"], sort_nulls=[ "first", "last" ]) == 'order by "column1" nulls first, "column2" nulls last') assert ( _build_composable_order_by(["column1", "column2"], ["asc", "desc"], ["first", "last"]) == 'order by "column1" asc nulls first, "column2" desc nulls last' ) assert _build_composable_order_by(None) == "" assert _build_composable_order_by("") == "" assert _build_composable_order_by([]) == "" with pytest.raises(ValueError): _build_composable_order_by([1, 2, 3]) with pytest.raises(ValueError): _build_composable_order_by(["column1", "column2"], "NOPE") with pytest.raises(ValueError): _build_composable_order_by(["column1", "column2"], sort_nulls="NOPE") with pytest.raises(ValueError): _build_composable_order_by(["column1", "column2"], ["asc", "NOPE"]) with pytest.raises(ValueError): _build_composable_order_by(["column1", "column2"], sort_nulls=["first", "NOPE"]) with pytest.raises(ValueError): _build_composable_order_by(["column1", "column2"], ["asc", "asc", "asc"]) with pytest.raises(ValueError): _build_composable_order_by( ["column1", "column2"], sort_nulls=["first", "first", "first"]) _sql = SQL("select id, latest_transaction_id from awards a ") + SQL( _build_composable_order_by(["a.id", "a.latest_transaction_id"], ["desc", "asc"], ["first", "last"])) assert execute_sql_to_ordered_dictionary(_sql) == [ OrderedDict((("id", 5), ("latest_transaction_id", 5))), OrderedDict((("id", 4), ("latest_transaction_id", 4))), OrderedDict((("id", 3), ("latest_transaction_id", 3))), OrderedDict((("id", 2), ("latest_transaction_id", 2))), OrderedDict((("id", 1), ("latest_transaction_id", 1))), ]
def mock_execute_sql(sql, results, verbosity=None): """SQL method is being mocked here since the `execute_sql_statement` used doesn't use the same DB connection to avoid multiprocessing errors """ return execute_sql_to_ordered_dictionary(sql)
def test_build_composable_order_by(): connection = get_connection() with connection.cursor() as cursor: def _build_composable_order_by(*args, **kwargs): result = build_composable_order_by(*args, **kwargs) return result.as_string(cursor.connection) assert _build_composable_order_by('column') == 'order by "column"' assert _build_composable_order_by( 'this.column') == 'order by "this"."column"' assert _build_composable_order_by('column', 'asc') == 'order by "column" asc' assert _build_composable_order_by( 'column', sort_nulls='first') == 'order by "column" nulls first' assert _build_composable_order_by( 'column', 'asc', 'first') == 'order by "column" asc nulls first' assert (_build_composable_order_by( ['column1', 'column2']) == 'order by "column1", "column2"') assert (_build_composable_order_by( ['column1', 'column2'], 'desc') == 'order by "column1" desc, "column2" desc') assert (_build_composable_order_by(['column1', 'column2'], sort_nulls='last') == 'order by "column1" nulls last, "column2" nulls last') assert ( _build_composable_order_by(['column1', 'column2'], 'desc', 'last') == 'order by "column1" desc nulls last, "column2" desc nulls last' ) assert (_build_composable_order_by( ['column1', 'column2'], ['asc', 'desc']) == 'order by "column1" asc, "column2" desc') assert (_build_composable_order_by( ['column1', 'column2'], sort_nulls=[ 'first', 'last' ]) == 'order by "column1" nulls first, "column2" nulls last') assert ( _build_composable_order_by(['column1', 'column2'], ['asc', 'desc'], ['first', 'last']) == 'order by "column1" asc nulls first, "column2" desc nulls last' ) assert _build_composable_order_by(None) == '' assert _build_composable_order_by('') == '' assert _build_composable_order_by([]) == '' with pytest.raises(ValueError): _build_composable_order_by([1, 2, 3]) with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], 'NOPE') with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], sort_nulls='NOPE') with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], ['asc', 'NOPE']) with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], sort_nulls=['first', 'NOPE']) with pytest.raises(ValueError): _build_composable_order_by(['column1', 'column2'], ['asc', 'asc', 'asc']) with pytest.raises(ValueError): _build_composable_order_by( ['column1', 'column2'], sort_nulls=['first', 'first', 'first']) _sql = (SQL('select id, latest_transaction_id from awards a ') + SQL( _build_composable_order_by(['a.id', 'a.latest_transaction_id'], ['desc', 'asc'], ['first', 'last']))) assert execute_sql_to_ordered_dictionary(_sql) == [ OrderedDict((('id', 5), ('latest_transaction_id', 5))), OrderedDict((('id', 4), ('latest_transaction_id', 4))), OrderedDict((('id', 3), ('latest_transaction_id', 3))), OrderedDict((('id', 2), ('latest_transaction_id', 2))), OrderedDict((('id', 1), ('latest_transaction_id', 1))), ]
def mock_execute_sql(sql, results): return execute_sql_to_ordered_dictionary(sql)