def post(self, request): """Return all budget function/subfunction titles matching the provided search text""" # TODO: check logic in name_dict[x]["aggregated_amount"] statements json_request = request.data category = json_request.get("category", None) scope = json_request.get("scope", None) filters = json_request.get("filters", None) limit = json_request.get("limit", 10) page = json_request.get("page", 1) lower_limit = (page - 1) * limit upper_limit = page * limit if category is None: raise InvalidParameterException("Missing one or more required request parameters: category") potential_categories = ["awarding_agency", "funding_agency", "recipient", "cfda_programs", "industry_codes"] if category not in potential_categories: raise InvalidParameterException("Category does not have a valid value") if (scope is None) and (category != "cfda_programs"): raise InvalidParameterException("Missing one or more required request parameters: scope") if filters is None: raise InvalidParameterException("Missing one or more required request parameters: filters") # filter queryset queryset = matview_search_filter(filters, UniversalTransactionView) filter_types = filters['award_type_codes'] if 'award_type_codes' in filters else award_type_mapping # filter the transactions by category if category == "awarding_agency": potential_scopes = ["agency", "subagency"] if scope not in potential_scopes: raise InvalidParameterException("scope does not have a valid value") if scope == "agency": queryset = queryset \ .filter(awarding_toptier_agency_name__isnull=False) \ .values( agency_name=F('awarding_toptier_agency_name'), agency_abbreviation=F('awarding_toptier_agency_abbreviation')) elif scope == "subagency": queryset = queryset \ .filter( awarding_subtier_agency_name__isnull=False) \ .values( agency_name=F('awarding_subtier_agency_name'), agency_abbreviation=F('awarding_subtier_agency_abbreviation')) elif scope == "office": # NOT IMPLEMENTED IN UI raise NotImplementedError queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types)\ .order_by('-aggregated_amount') results = list(queryset[lower_limit:upper_limit + 1]) page_metadata = get_simple_pagination_metadata(len(results), limit, page) results = results[:limit] response = {"category": category, "scope": scope, "limit": limit, "results": results, "page_metadata": page_metadata} return Response(response) elif category == "funding_agency": potential_scopes = ["agency", "subagency"] if scope not in potential_scopes: raise InvalidParameterException("scope does not have a valid value") if scope == "agency": queryset = queryset \ .filter(funding_toptier_agency_name__isnull=False) \ .values( agency_name=F('funding_toptier_agency_name'), agency_abbreviation=F('funding_toptier_agency_abbreviation')) elif scope == "subagency": queryset = queryset \ .filter( funding_subtier_agency_name__isnull=False) \ .values( agency_name=F('funding_subtier_agency_name'), agency_abbreviation=F('funding_subtier_agency_abbreviation')) elif scope == "office": # NOT IMPLEMENTED IN UI raise NotImplementedError queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \ .order_by('-aggregated_amount') results = list(queryset[lower_limit:upper_limit + 1]) page_metadata = get_simple_pagination_metadata(len(results), limit, page) results = results[:limit] response = {"category": category, "scope": scope, "limit": limit, "results": results, "page_metadata": page_metadata} return Response(response) elif category == "recipient": if scope == "duns": queryset = queryset \ .values(legal_entity_id=F("recipient_id")) queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \ .order_by('-aggregated_amount') \ .values("aggregated_amount", "legal_entity_id", "recipient_name") \ .order_by("-aggregated_amount") # Begin DB hits here results = list(queryset[lower_limit:upper_limit + 1]) page_metadata = get_simple_pagination_metadata(len(results), limit, page) results = results[:limit] elif scope == "parent_duns": queryset = queryset \ .filter(parent_recipient_unique_id__isnull=False) queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types, calculate_totals=False) \ .values( 'aggregated_amount', 'recipient_name', 'parent_recipient_unique_id') \ .order_by('-aggregated_amount') # Begin DB hits here results = list(queryset[lower_limit:upper_limit + 1]) page_metadata = get_simple_pagination_metadata(len(results), limit, page) results = results[:limit] else: # recipient_type raise InvalidParameterException("recipient type is not yet implemented") response = {"category": category, "scope": scope, "limit": limit, "results": results, "page_metadata": page_metadata} return Response(response) elif category == "cfda_programs": if can_use_view(filters, 'SummaryCfdaNumbersView'): queryset = get_view_queryset(filters, 'SummaryCfdaNumbersView') queryset = queryset \ .filter( federal_action_obligation__isnull=False, cfda_number__isnull=False) \ .values(cfda_program_number=F("cfda_number")) queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \ .values( "aggregated_amount", "cfda_program_number", program_title=F("cfda_title")) \ .order_by('-aggregated_amount') # Begin DB hits here results = list(queryset[lower_limit:upper_limit + 1]) page_metadata = get_simple_pagination_metadata(len(results), limit, page) results = results[:limit] for trans in results: trans['popular_name'] = None # small DB hit every loop here cfda = Cfda.objects \ .filter( program_title=trans['program_title'], program_number=trans['cfda_program_number']) \ .values('popular_name').first() if cfda: trans['popular_name'] = cfda['popular_name'] else: queryset = queryset \ .filter( cfda_number__isnull=False) \ .values(cfda_program_number=F("cfda_number")) queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \ .values( "aggregated_amount", "cfda_program_number", popular_name=F("cfda_popular_name"), program_title=F("cfda_title")) \ .order_by('-aggregated_amount') # Begin DB hits here results = list(queryset[lower_limit:upper_limit + 1]) page_metadata = get_simple_pagination_metadata(len(results), limit, page) results = results[:limit] response = {"category": category, "limit": limit, "results": results, "page_metadata": page_metadata} return Response(response) elif category == "industry_codes": # industry_codes if scope == "psc": if can_use_view(filters, 'SummaryPscCodesView'): queryset = get_view_queryset(filters, 'SummaryPscCodesView') queryset = queryset \ .filter(product_or_service_code__isnull=False) \ .values(psc_code=F("product_or_service_code")) else: queryset = queryset \ .filter(psc_code__isnull=False) \ .values("psc_code") queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \ .order_by('-aggregated_amount') # Begin DB hits here results = list(queryset[lower_limit:upper_limit + 1]) page_metadata = get_simple_pagination_metadata(len(results), limit, page) results = results[:limit] response = {"category": category, "scope": scope, "limit": limit, "results": results, "page_metadata": page_metadata} return Response(response) elif scope == "naics": if can_use_view(filters, 'SummaryNaicsCodesView'): queryset = get_view_queryset(filters, 'SummaryNaicsCodesView') queryset = queryset \ .filter(naics_code__isnull=False) \ .values('naics_code') queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \ .order_by('-aggregated_amount') \ .values( 'naics_code', 'aggregated_amount', 'naics_description') else: queryset = queryset \ .filter(naics_code__isnull=False) \ .values("naics_code") queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \ .order_by('-aggregated_amount') \ .values( 'naics_code', 'aggregated_amount', 'naics_description') # Begin DB hits here results = list(queryset[lower_limit:upper_limit + 1]) page_metadata = get_simple_pagination_metadata(len(results), limit, page) results = results[:limit] response = {"category": category, "scope": scope, "limit": limit, "results": results, "page_metadata": page_metadata} return Response(response) else: # recipient_type raise InvalidParameterException("recipient type is not yet implemented")
def get_queryset(self): # retrieve post request payload json_request = self.request.query_params # Retrieve fiscal_year & awarding_agency_id from Request fiscal_year = json_request.get('fiscal_year') awarding_agency_id = json_request.get('awarding_agency_id') # Optional Award Category award_category = json_request.get('award_category') # Required Query Parameters were not Provided if not (fiscal_year and awarding_agency_id): raise InvalidParameterException( 'Missing one or more required query parameters: fiscal_year, awarding_agency_id' ) if not check_valid_toptier_agency(awarding_agency_id): raise InvalidParameterException( 'Awarding Agency ID provided must correspond to a toptier agency' ) toptier_agency = Agency.objects.filter( id=awarding_agency_id).first().toptier_agency queryset = TransactionNormalized.objects.filter( federal_action_obligation__isnull=False) # DS-1655: if the AID is "097" (DOD), Include the branches of the military in the queryset if toptier_agency.cgac_code == DOD_CGAC: tta_list = DOD_ARMED_FORCES_CGAC queryset = queryset.filter( # Filter based on fiscal_year and awarding_category_id fiscal_year=fiscal_year, awarding_agency__toptier_agency__cgac_code__in=tta_list) else: queryset = queryset.filter( # Filter based on fiscal_year and awarding_category_id fiscal_year=fiscal_year, awarding_agency__toptier_agency__cgac_code=toptier_agency. cgac_code) queryset = queryset.annotate( award_category=F('award__category'), recipient_id=F('recipient__legal_entity_id'), recipient_name=F('recipient__recipient_name')) if award_category is not None: # Filter based on award_category if award_category != "other": queryset = queryset.filter(award_category=award_category) else: queryset = queryset.filter(Q(award_category='insurance') | Q(award_category='other')).\ annotate(award_category=Case(When(award_category='insurance', then=Value('other')), output_field=CharField()) ) # Sum Obligations for each Recipient queryset = queryset.values( 'award_category', 'recipient_id', 'recipient_name').annotate(obligated_amount=Sum( 'federal_action_obligation')).order_by('-obligated_amount') return queryset
def type_filter(_type, filters, limit=None): fiscal_year = None fiscal_quarter = None fiscal_date = None _types = [ "budget_function", "budget_subfunction", "federal_account", "program_activity", "object_class", "recipient", "award", "award_category", "agency", "agency_type", "agency_sub", ] # Validate explorer _type if _type is None: raise InvalidParameterException( 'Missing Required Request Parameter, "type": "type"') elif _type not in _types: raise InvalidParameterException( "Type does not have a valid value. " "Valid Types: budget_function, budget_subfunction, federal_account, program_activity," "object_class, recipient, award, award_category agency, agency_type, agency_sub" ) if filters is None: raise InvalidParameterException( 'Missing Required Request Parameter, "filters": { "filter_options" }' ) # Get fiscal_date and fiscal_quarter for key, value in filters.items(): if key == "fy": try: fiscal_year = int(value) if fiscal_year < 1000 or fiscal_year > 9999: raise InvalidParameterException( 'Incorrect Fiscal Year Parameter, "fy": "YYYY"') except ValueError: raise InvalidParameterException( 'Incorrect or Missing Fiscal Year Parameter, "fy": "YYYY"') elif key == "quarter": if value in ("1", "2", "3", "4"): fiscal_quarter = int(value) else: raise InvalidParameterException( "Incorrect value provided for quarter parameter. Must be a string between 1 and 4" ) if fiscal_year: fiscal_date, fiscal_quarter = generate_last_completed_fiscal_quarter( fiscal_year=fiscal_year, fiscal_quarter=fiscal_quarter) # Recipient, Award Queryset alt_set = (FinancialAccountsByAwards.objects.all().exclude( transaction_obligated_amount__isnull=True).exclude( transaction_obligated_amount="NaN").filter( submission__reporting_fiscal_quarter=fiscal_quarter).filter( submission__reporting_fiscal_year=fiscal_year).annotate( amount=Sum("transaction_obligated_amount"))) # Base Queryset queryset = (FinancialAccountsByProgramActivityObjectClass.objects.all( ).exclude( obligations_incurred_by_program_object_class_cpe__isnull=True ).filter(submission__reporting_fiscal_quarter=fiscal_quarter).filter( submission__reporting_fiscal_year=fiscal_year).annotate( amount=Sum("obligations_incurred_by_program_object_class_cpe"))) # Apply filters to queryset results alt_set, queryset = spending_filter(alt_set, queryset, filters, _type) if _type == "recipient" or _type == "award" or _type == "award_category" or _type == "agency_type": # Annotate and get explorer _type filtered results exp = Explorer(alt_set, queryset) if _type == "recipient": alt_set = exp.recipient() if _type == "award": alt_set = exp.award() if _type == "award_category": alt_set = exp.award_category() # Total value of filtered results actual_total = 0 for award in alt_set: award["id"] = str(award["id"]) if _type in ["award", "award_category"]: code = None for code_type in ("piid", "fain", "uri"): if award[code_type]: code = award[code_type] break for code_type in ("piid", "fain", "uri"): del award[code_type] award["code"] = code if _type == "award": award["name"] = code actual_total += award["total"] alt_set = alt_set[:limit] if _type == "award" else alt_set results = { "total": actual_total, "end_date": fiscal_date, "results": alt_set } else: # Annotate and get explorer _type filtered results exp = Explorer(alt_set, queryset) if _type == "budget_function": queryset = exp.budget_function() if _type == "budget_subfunction": queryset = exp.budget_subfunction() if _type == "federal_account": queryset = exp.federal_account() if _type == "program_activity": queryset = exp.program_activity() if _type == "object_class": queryset = exp.object_class() if _type == "agency": queryset = exp.agency() # Actual total value of filtered results actual_total = queryset.aggregate(total=Sum( "obligations_incurred_by_program_object_class_cpe"))["total"] result_set, expected_total = get_unreported_data_obj( queryset=queryset, filters=filters, limit=limit, spending_type=_type, actual_total=actual_total, fiscal_year=fiscal_year, fiscal_quarter=fiscal_quarter, ) results = { "total": expected_total, "end_date": fiscal_date, "results": result_set } return results
def validate_year(year=None): if year and not (year.isdigit() or year in ['all', 'latest']): raise InvalidParameterException('Invalid year: {}.'.format(year)) return year
def get_queryset(self): # retrieve post request payload json_request = self.request.query_params # Retrieve fiscal_year & awarding_agency_id from Request fiscal_year = json_request.get("fiscal_year") awarding_agency_id = json_request.get("awarding_agency_id") # Optional Award Category award_category = json_request.get("award_category") # Required Query Parameters were not Provided if not (fiscal_year and awarding_agency_id): raise InvalidParameterException( "Missing one or more required query parameters: fiscal_year, awarding_agency_id" ) if not check_valid_toptier_agency(awarding_agency_id): raise InvalidParameterException( "Awarding Agency ID provided must correspond to a toptier agency" ) toptier_agency = Agency.objects.filter( id=awarding_agency_id).first().toptier_agency queryset = TransactionNormalized.objects.filter( federal_action_obligation__isnull=False) # DS-1655: if the AID is "097" (DOD), Include the branches of the military in the queryset if toptier_agency.toptier_code == DOD_CGAC: tta_list = DOD_ARMED_FORCES_CGAC queryset = queryset.filter( # Filter based on fiscal_year and awarding_category_id fiscal_year=fiscal_year, awarding_agency__toptier_agency__toptier_code__in=tta_list, ) else: queryset = queryset.filter( # Filter based on fiscal_year and awarding_category_id fiscal_year=fiscal_year, awarding_agency__toptier_agency__toptier_code=toptier_agency. toptier_code, ) queryset = queryset.annotate( award_category=F("award__category"), recipient_name=Coalesce( F("award__latest_transaction__assistance_data__awardee_or_recipient_legal" ), F("award__latest_transaction__contract_data__awardee_or_recipient_legal" ), ), ) if award_category is not None: # Filter based on award_category if award_category != "other": queryset = queryset.filter(award_category=award_category) else: queryset = queryset.filter( Q(award_category="insurance") | Q(award_category="other")).annotate(award_category=Case( When(award_category="insurance", then=Value("other")), output_field=CharField())) # Sum Obligations for each Recipient queryset = (queryset.values( "award_category", "recipient_name").annotate(obligated_amount=Sum( "federal_action_obligation")).order_by("-obligated_amount")) return queryset
def subaward_filter(filters, for_downloads=False): queryset = SubawardView.objects.all() recipient_scope_q = Q(recipient_location_country_code="USA") | Q( recipient_location_country_name="UNITED STATES") pop_scope_q = Q(pop_country_code="USA") | Q( pop_country_name="UNITED STATES") for key, value in filters.items(): if value is None: raise InvalidParameterException("Invalid filter: " + key + " has null as its value.") key_list = [ "keywords", "elasticsearch_keyword", "time_period", "award_type_codes", "prime_and_sub_award_types", "agencies", "legal_entities", "recipient_search_text", "recipient_scope", "recipient_locations", "recipient_type_names", "place_of_performance_scope", "place_of_performance_locations", "award_amounts", "award_ids", "program_numbers", "naics_codes", "psc_codes", "contract_pricing_type_codes", "set_aside_type_codes", "extent_competed_type_codes", "tas_codes", ] if key not in key_list: raise InvalidParameterException("Invalid filter: " + key + " does not exist.") if key == "keywords": def keyword_parse(keyword): # keyword_ts_vector & award_ts_vector are Postgres TS_vectors. # keyword_ts_vector = recipient_name + psc_description + subaward_description # award_ts_vector = piid + fain + uri + subaward_number filter_obj = Q(keyword_ts_vector=keyword) | Q( award_ts_vector=keyword) # Commenting out until NAICS is associated with subawards in DAIMS 1.3.1 # if keyword.isnumeric(): # filter_obj |= Q(naics_code__contains=keyword) if len(keyword) == 4 and PSC.objects.filter( code__iexact=keyword).exists(): filter_obj |= Q(product_or_service_code__iexact=keyword) return filter_obj filter_obj = Q() for keyword in value: filter_obj |= keyword_parse(keyword) potential_duns = list( filter((lambda x: len(x) > 7 and len(x) < 10), value)) if len(potential_duns) > 0: filter_obj |= Q(recipient_unique_id__in=potential_duns) | Q( parent_recipient_unique_id__in=potential_duns) queryset = queryset.filter(filter_obj) elif key == "elasticsearch_keyword": keyword = value transaction_ids = elasticsearch_helper.get_download_ids( keyword=keyword, field="transaction_id") # flatten IDs transaction_ids = list( itertools.chain.from_iterable(transaction_ids)) logger.info("Found {} transactions based on keyword: {}".format( len(transaction_ids), keyword)) transaction_ids = [ str(transaction_id) for transaction_id in transaction_ids ] queryset = queryset.filter(latest_transaction_id__isnull=False) # Prepare a SQL snippet to include in the predicate for searching an array of transaction IDs sql_fragment = '"subaward_view"."latest_transaction_id" = ANY(\'{{{}}}\'::int[])' # int[] -> int array type queryset = queryset.extra( where=[sql_fragment.format(",".join(transaction_ids))]) elif key == "time_period": min_date = API_SEARCH_MIN_DATE if for_downloads: min_date = API_MIN_DATE queryset &= combine_date_range_queryset(value, SubawardView, min_date, API_MAX_DATE) elif key == "award_type_codes": queryset = queryset.filter(prime_award_type__in=value) elif key == "prime_and_sub_award_types": award_types = value.get("sub_awards") if award_types: queryset = queryset.filter(award_type__in=award_types) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error funding_toptier = Q() funding_subtier = Q() awarding_toptier = Q() awarding_subtier = Q() for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier |= Q(funding_toptier_agency_name=name) elif tier == "subtier": if "toptier_name" in v: funding_subtier |= Q( funding_subtier_agency_name=name ) & Q( funding_toptier_agency_name=v["toptier_name"]) else: funding_subtier |= Q( funding_subtier_agency_name=name) elif type == "awarding": if tier == "toptier": awarding_toptier |= Q( awarding_toptier_agency_name=name) elif tier == "subtier": if "toptier_name" in v: awarding_subtier |= Q( awarding_subtier_agency_name=name ) & Q( awarding_toptier_agency_name=v["toptier_name"]) else: awarding_subtier |= Q( awarding_subtier_agency_name=name) awarding_queryfilter = Q() funding_queryfilter = Q() # Since these are Q filters, no DB hits for boolean checks if funding_toptier: funding_queryfilter |= funding_toptier if funding_subtier: funding_queryfilter |= funding_subtier if awarding_toptier: awarding_queryfilter |= awarding_toptier if awarding_subtier: awarding_queryfilter |= awarding_subtier queryset = queryset.filter(funding_queryfilter & awarding_queryfilter) elif key == "legal_entities": # This filter key has effectively become obsolete by recipient_search_text msg = 'API request included "{}" key. No filtering will occur with provided value "{}"' logger.info(msg.format(key, value)) elif key == "recipient_search_text": def recip_string_parse(recipient_string): upper_recipient_string = recipient_string.upper() # recipient_name_ts_vector is a postgres TS_Vector filter_obj = Q(recipient_name_ts_vector=upper_recipient_string) if len(upper_recipient_string ) == 9 and upper_recipient_string[:5].isnumeric(): filter_obj |= Q(recipient_unique_id=upper_recipient_string) return filter_obj filter_obj = Q() for recipient in value: filter_obj |= recip_string_parse(recipient) queryset = queryset.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter(recipient_scope_q) elif value == "foreign": queryset = queryset.exclude(recipient_scope_q) else: raise InvalidParameterException( "Invalid filter: recipient_scope type is invalid.") elif key == "recipient_locations": queryset = queryset.filter( geocode_filter_locations("recipient_location", value)) elif key == "recipient_type_names": if len(value) != 0: queryset = queryset.filter(business_categories__overlap=value) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter(pop_scope_q) elif value == "foreign": queryset = queryset.exclude(pop_scope_q) else: raise InvalidParameterException( "Invalid filter: place_of_performance_scope is invalid.") elif key == "place_of_performance_locations": queryset = queryset.filter(geocode_filter_locations("pop", value)) elif key == "award_amounts": queryset &= total_obligation_queryset(value, SubawardView, filters) elif key == "award_ids": queryset = build_award_ids_filter(queryset, value, ("piid", "fain")) # add "naics_codes" (column naics) after NAICS are mapped to subawards elif key in ("program_numbers", "psc_codes", "contract_pricing_type_codes"): filter_to_col = { "program_numbers": "cfda_number", "psc_codes": "product_or_service_code", "contract_pricing_type_codes": "type_of_contract_pricing", } in_query = [v for v in value] if len(in_query) != 0: queryset &= SubawardView.objects.filter( **{"{}__in".format(filter_to_col[key]): in_query}) elif key in ("set_aside_type_codes", "extent_competed_type_codes"): or_queryset = Q() filter_to_col = { "set_aside_type_codes": "type_set_aside", "extent_competed_type_codes": "extent_competed" } in_query = [v for v in value] for v in value: or_queryset |= Q( **{"{}__exact".format(filter_to_col[key]): in_query}) queryset = queryset.filter(or_queryset) elif key == "tas_codes": queryset = build_tas_codes_filter(queryset, value) return queryset
def validate_year(year=None): if year and not (year.isdigit() or year in ["all", "latest"]): raise InvalidParameterException("Invalid year: {}.".format(year)) return year
def validate_award_request(request_data): """Analyze request and raise any formatting errors as Exceptions""" _validate_required_parameters(request_data, ["award_levels", "filters"]) filters = _validate_filters(request_data) award_levels = _validate_award_levels(request_data) json_request = {"download_types": award_levels, "filters": {}} # Set defaults of non-required parameters json_request["agency"] = request_data["filters"]["agency"] if request_data[ "filters"].get("agency") else "all" json_request["columns"] = request_data.get("columns", []) json_request["file_format"] = str(request_data.get("file_format", "csv")).lower() check_types_and_assign_defaults(filters, json_request["filters"], SHARED_AWARD_FILTER_DEFAULTS) json_request["filters"]["award_type_codes"] = _validate_award_type_codes( filters) _validate_and_update_locations(filters, json_request) _validate_tas_codes(filters, json_request) _validate_file_format(json_request) # Overriding all other filters if the keyword filter is provided in year-constraint download # Make sure this is after checking the award_levels constraint_type = request_data.get("constraint_type") if constraint_type == "year" and "elasticsearch_keyword" in filters: json_request["filters"] = { "elasticsearch_keyword": filters["elasticsearch_keyword"], "award_type_codes": list(award_type_mapping.keys()), } json_request["limit"] = settings.MAX_DOWNLOAD_LIMIT return json_request # Validate time periods total_range_count = validate_time_periods(filters, json_request) if constraint_type == "row_count": # Validate limit exists and is below MAX_DOWNLOAD_LIMIT json_request["limit"] = parse_limit(request_data) # Validate row_count-constrained filter types and assign defaults check_types_and_assign_defaults(filters, json_request["filters"], ROW_CONSTRAINT_FILTER_DEFAULTS) elif constraint_type == "year": # Validate combined total dates within one year (allow for leap years) if total_range_count > 366: raise InvalidParameterException( "Invalid Parameter: time_period total days must be within a year" ) # Validate year-constrained filter types and assign defaults check_types_and_assign_defaults(filters, json_request["filters"], YEAR_CONSTRAINT_FILTER_DEFAULTS) else: raise InvalidParameterException( 'Invalid parameter: constraint_type must be "row_count" or "year"') return json_request
def _validate_required_parameters(request_data, required_parameters): for required_param in required_parameters: if required_param not in request_data: raise InvalidParameterException( "Missing one or more required body parameters: {}".format( required_param))
def post(self, request): """Return list of downloads that match the requested params""" response_data = {} post_data = request.data agency_id = post_data.get('agency', None) fiscal_year = post_data.get('fiscal_year', None) download_type = post_data.get('type', None) required_params = { 'agency': agency_id, 'fiscal_year': fiscal_year, 'type': download_type } for required_param, param_value in required_params.items(): if param_value is None: raise InvalidParameterException( 'Missing one or more required query parameters: {}'.format( required_param)) # Populate regex fiscal_year_regex = str(fiscal_year) if fiscal_year else '\d{4}' download_type_regex = download_type.capitalize( ) if download_type else '(Contracts|Assistance)' cgac_regex = '.*' if agency_id and agency_id == 'all': cgac_regex = 'all' elif agency_id: cgac_codes = ToptierAgency.objects.filter( toptier_agency_id=agency_id).values('cgac_code') if cgac_codes: cgac_regex = cgac_codes[0]['cgac_code'] else: raise InvalidParameterException( '{} agency not found'.format(agency_id)) monthly_dl_regex = '{}_{}_{}_Full_.*\.zip'.format( fiscal_year_regex, cgac_regex, download_type_regex) # Generate regex possible prefix prefixes = [] for regex, add_regex in [(fiscal_year_regex, fiscal_year), (cgac_regex, agency_id), (download_type_regex, download_type)]: if not add_regex: break prefixes.append(regex) prefix = '_'.join(prefixes) # Get and filter the files we need bucket_name = self.s3_handler.bucketRoute region_name = S3Handler.REGION bucket = boto.s3.connect_to_region(region_name).get_bucket(bucket_name) monthly_dls_names = list( filter( re.compile(monthly_dl_regex).search, [key.name for key in bucket.list(prefix=prefix)])) # Generate response downloads = [] for name in monthly_dls_names: name_data = re.findall('(.*)_(.*)_(.*)_Full_(.*)\.zip', name)[0] agency_name = None agency_abbr = None agency_cgac = name_data[1] if agency_cgac != 'all': agency = ToptierAgency.objects.filter( cgac_code=agency_cgac).values('name', 'abbreviation') if agency: agency_name = agency[0]['name'] agency_abbr = agency[0]['abbreviation'] else: agency_name = 'All' # Simply adds dashes for the date, 20180101 -> 2018-01-01, could also use strftime updated_date = '-'.join( [name_data[3][:4], name_data[3][4:6], name_data[3][6:]]) downloads.append({ 'fiscal_year': name_data[0], 'agency_name': agency_name, 'agency_acronym': agency_abbr, 'type': name_data[2].lower(), 'updated_date': updated_date, 'file_name': name, 'url': self.s3_handler.get_simple_url(file_name=name) }) response_data['monthly_files'] = downloads return Response(response_data)
def validate_request(self, json_request): """Analyze request and raise any formatting errors as Exceptions""" constraint_type = json_request.get('constraint_type', None) # Overriding all other filters if the keyword filter is provided in year-constraint download if constraint_type == 'year' and 'elasticsearch_keyword' in json_request[ 'filters']: json_request['filters'] = { 'elasticsearch_keyword': json_request['filters']['elasticsearch_keyword'], 'award_type_codes': list(award_type_mapping.keys()) } json_request['limit'] = settings.MAX_DOWNLOAD_LIMIT return json_request # Validate required parameters for required_param in ['award_levels', 'filters']: if required_param not in json_request: raise InvalidParameterException( 'Missing one or more required query parameters: {}'.format( required_param)) if not isinstance(json_request['award_levels'], list): raise InvalidParameterException( 'Award levels parameter not provided as a list') elif len(json_request['award_levels']) == 0: raise InvalidParameterException( 'At least one award level is required.') for award_level in json_request['award_levels']: if award_level not in VALUE_MAPPINGS: raise InvalidParameterException( 'Invalid award_level: {}'.format(award_level)) if not isinstance(json_request['filters'], dict): raise InvalidParameterException( 'Filters parameter not provided as a dict') elif len(json_request['filters']) == 0: raise InvalidParameterException('At least one filter is required.') # Set defaults of non-required parameters json_request['columns'] = json_request.get('columns', []) json_request['file_format'] = json_request.get('file_format', 'csv') # Validate shared filter types and assign defaults filters = json_request['filters'] check_types_and_assign_defaults(filters, SHARED_FILTER_DEFAULTS) # Validate award type types if not filters.get('award_type_codes', None) or len(filters['award_type_codes']) < 1: filters['award_type_codes'] = list(award_type_mapping.keys()) for award_type_code in filters['award_type_codes']: if award_type_code not in award_type_mapping: raise InvalidParameterException( 'Invalid award_type: {}'.format(award_type_code)) # Validate time periods total_range_count = validate_time_periods(filters) if constraint_type == 'row_count': # Validate limit exists and is below MAX_DOWNLOAD_LIMIT json_request['limit'] = parse_limit(json_request) # Validate row_count-constrainted filter types and assign defaults check_types_and_assign_defaults(filters, ROW_CONSTRAINT_FILTER_DEFAULTS) elif constraint_type == 'year': # Validate combined total dates within one year (allow for leap years) if total_range_count > 366: raise InvalidParameterException( 'Invalid Parameter: time_period total days must be within a year' ) # Validate year-constrainted filter types and assign defaults check_types_and_assign_defaults(filters, YEAR_CONSTRAINT_FILTER_DEFAULTS) else: raise InvalidParameterException( 'Invalid parameter: constraint_type must be "row_count" or "year"' ) return json_request
def post(self, request): """Return list of agencies if no POST data is provided. Otherwise, returns sub_agencies/federal_accounts associated with the agency provided""" response_data = { 'agencies': [], 'sub_agencies': [], 'federal_accounts': [] } if not self.sub_agencies_map: # populate the sub_agencies dictionary self.pull_modified_agencies_cgacs_subtiers() used_cgacs = set(self.sub_agencies_map.values()) agency_id = None post_data = request.data if post_data: if 'agency' not in post_data: raise InvalidParameterException( 'Missing one or more required query parameters: agency') agency_id = post_data['agency'] # Get all the top tier agencies toptier_agencies = list( ToptierAgency.objects.filter(cgac_code__in=used_cgacs).values( 'name', 'toptier_agency_id', 'cgac_code')) if not agency_id: # Return all the agencies if no agency id provided cfo_agencies = sorted( list( filter(lambda agency: agency['cgac_code'] in CFO_CGACS, toptier_agencies)), key=lambda agency: CFO_CGACS.index(agency['cgac_code'])) other_agencies = sorted([ agency for agency in toptier_agencies if agency not in cfo_agencies ], key=lambda agency: agency['name']) response_data['agencies'] = { 'cfo_agencies': cfo_agencies, 'other_agencies': other_agencies } else: # Get the top tier agency object based on the agency id provided top_tier_agency = list( filter( lambda toptier: toptier['toptier_agency_id'] == agency_id, toptier_agencies)) if not top_tier_agency: raise InvalidParameterException('Agency ID not found') top_tier_agency = top_tier_agency[0] # Get the sub agencies and federal accounts associated with that top tier agency # Removed distinct subtier_agency_name since removing subtiers with multiple codes that aren't in the # modified list response_data['sub_agencies'] = Agency.objects.filter(toptier_agency_id=agency_id)\ .values(subtier_agency_name=F('subtier_agency__name'), subtier_agency_code=F('subtier_agency__subtier_code'))\ .order_by('subtier_agency_name') # Tried converting this to queryset filtering but ran into issues trying to # double check the right used subtier_agency by cross checking the cgac_code # see the last 2 lines of the list comprehension below response_data['sub_agencies'] = [ subagency for subagency in response_data['sub_agencies'] if subagency['subtier_agency_code'] in self.sub_agencies_map and self.sub_agencies_map[subagency['subtier_agency_code']] == top_tier_agency['cgac_code'] ] for subagency in response_data['sub_agencies']: del subagency['subtier_agency_code'] response_data['federal_accounts'] = FederalAccount.objects\ .filter(agency_identifier=top_tier_agency['cgac_code'])\ .values(federal_account_name=F('account_title'), federal_account_id=F('id'))\ .order_by('federal_account_name') return Response(response_data)
def process_filters(self, request_data): """Filter function to update Bulk Download parameters to shared parameters""" # Validate filter parameter filters = request_data.get('filters', None) if not filters: raise InvalidParameterException( 'Missing one or more required query parameters: filters') # Validate keyword search first, remove all other filters if 'keyword' in filters: request_data['filters'] = { 'elasticsearch_keyword': filters['keyword'] } return # Validate other parameters previously required by the Bulk Download endpoint for required_param in [ 'award_types', 'agency', 'date_type', 'date_range' ]: if required_param not in filters: raise InvalidParameterException( 'Missing one or more required query parameters: {}'.format( required_param)) # Replacing award_types with award_type_codes filters['award_type_codes'] = [] try: for award_type_code in filters['award_types']: if award_type_code in all_award_types_mappings: filters['award_type_codes'].extend( all_award_types_mappings[award_type_code]) else: raise InvalidParameterException( 'Invalid award_type: {}'.format(award_type_code)) del filters['award_types'] except TypeError: raise InvalidParameterException( 'award_types parameter not provided as a list') # Replacing date_range with time_period date_range_copied = filters['date_range'].copy() date_range_copied['date_type'] = filters['date_type'] filters['time_period'] = [date_range_copied] del filters['date_range'] del filters['date_type'] # Replacing agency with agencies if filters['agency'] != 'all': toptier_name = (ToptierAgency.objects.filter( toptier_agency_id=filters['agency']).values('name')) if not toptier_name: raise InvalidParameterException( 'Toptier ID not found: {}'.format(filters['agency'])) toptier_name = toptier_name[0]['name'] filters['agencies'] = [{ 'type': 'awarding', 'tier': 'toptier', 'name': toptier_name }] if 'sub_agency' in filters: if filters['sub_agency']: filters['agencies'].append({ 'type': 'awarding', 'tier': 'subtier', 'name': filters['sub_agency'] }) del filters['sub_agency'] del filters['agency'] request_data['filters'] = filters
def account_download_filter(account_type, download_table, filters, account_level="treasury_account"): query_filters = {} tas_id = "treasury_account_identifier" if account_type == "account_balances" else "treasury_account" # Filter by Agency, if provided if filters.get("agency", False) and filters["agency"] != "all": agency = ToptierAgency.objects.filter( toptier_agency_id=filters["agency"]).first() if not agency: raise InvalidParameterException( "Agency with that ID does not exist") query_filters[ f"{tas_id}__funding_toptier_agency_id"] = agency.toptier_agency_id # Filter by Federal Account, if provided if filters.get("federal_account", False) and filters["federal_account"] != "all": federal_account_obj = FederalAccount.objects.filter( id=filters["federal_account"]).first() if federal_account_obj: query_filters[f"{tas_id}__federal_account__id"] = filters[ "federal_account"] else: raise InvalidParameterException( "Federal Account with that ID does not exist") # Filter by Budget Function, if provided if filters.get("budget_function", False) and filters["budget_function"] != "all": query_filters[f"{tas_id}__budget_function_code"] = filters[ "budget_function"] # Filter by Budget SubFunction, if provided if filters.get("budget_subfunction", False) and filters["budget_subfunction"] != "all": query_filters[f"{tas_id}__budget_subfunction_code"] = filters[ "budget_subfunction"] # Filter by Fiscal Year and Quarter reporting_period_start, reporting_period_end, start_date, end_date = retrieve_fyq_filters( account_type, account_level, filters) query_filters[reporting_period_start] = start_date query_filters[reporting_period_end] = end_date # Create the base queryset queryset = download_table.objects if account_type in ["account_balances", "object_class_program_activity"]: # only include the latest TASs, not all of them unique_id_mapping = { "account_balances": "appropriation_account_balances_id", "object_class_program_activity": "financial_accounts_by_program_activity_object_class_id", } unique_columns_mapping = { "account_balances": ["treasury_account_identifier__tas_rendering_label"], "object_class_program_activity": [ "treasury_account__tas_rendering_label", "program_activity__program_activity_code", "object_class__object_class", "object_class__direct_reimbursable", ], } # DEV-5180 check if cares act features are enabled in production yet if settings.ENABLE_CARES_ACT_FEATURES is True: unique_columns_mapping["object_class_program_activity"].append( "disaster_emergency_fund__code") unique_columns_mapping["object_class_program_activity"].append( "disaster_emergency_fund__title") distinct_cols = unique_columns_mapping[account_type] order_by_cols = distinct_cols + ["-reporting_period_start", "-pk"] latest_ids_q = (download_table.objects.filter( **query_filters).distinct(*distinct_cols).order_by( *order_by_cols).values(unique_id_mapping[account_type])) if latest_ids_q.exists(): query_filters[ f"{unique_id_mapping[account_type]}__in"] = latest_ids_q # Make derivations based on the account level if account_level == "treasury_account": queryset = generate_treasury_account_query(queryset, account_type, tas_id) elif account_level == "federal_account": queryset = generate_federal_account_query(queryset, account_type, tas_id) else: raise InvalidParameterException( 'Invalid Parameter: account_level must be either "federal_account" or ' '"treasury_account"') # Apply filter and return return queryset.filter(**query_filters)
def post(self, request): json_request = request.data self.scope = json_request.get("scope") self.filters = json_request.get("filters", {}) self.geo_layer = json_request.get("geo_layer") self.geo_layer_filters = json_request.get("geo_layer_filters") fields_list = [] # fields to include in the aggregate query loc_dict = { 'state': 'state_code', 'county': 'county_code', 'district': 'congressional_code' } model_dict = { 'place_of_performance': 'pop', 'recipient_location': 'recipient_location' } # Build the query based on the scope fields and geo_layers # Fields not in the reference objects above then request is invalid scope_field_name = model_dict.get(self.scope) loc_field_name = loc_dict.get(self.geo_layer) loc_lookup = '{}_{}'.format(scope_field_name, loc_field_name) if scope_field_name is None: raise InvalidParameterException("Invalid request parameters: scope") if loc_field_name is None: raise InvalidParameterException("Invalid request parameters: geo_layer") self.queryset, self.matview_model = spending_by_geography(self.filters) if self.geo_layer == 'state': # State will have one field (state_code) containing letter A-Z kwargs = { '{}_country_code'.format(scope_field_name): 'USA', 'federal_action_obligation__isnull': False } # Only state scope will add its own state code # State codes are consistent in db ie AL, AK fields_list.append(loc_lookup) state_response = { 'scope': self.scope, 'geo_layer': self.geo_layer, 'results': self.state_results(kwargs, fields_list, loc_lookup) } return Response(state_response) else: # County and district scope will need to select multiple fields # State code is needed for county/district aggregation state_lookup = '{}_{}'.format(scope_field_name, loc_dict['state']) fields_list.append(state_lookup) # Adding regex to county/district codes to remove entries with letters since # can't be surfaced by map kwargs = {'federal_action_obligation__isnull': False} if self.geo_layer == 'county': # County name added to aggregation since consistent in db county_name = '{}_{}'.format(scope_field_name, 'county_name') fields_list.append(county_name) self.county_district_queryset( kwargs, fields_list, loc_lookup, state_lookup, scope_field_name ) county_response = { 'scope': self.scope, 'geo_layer': self.geo_layer, 'results': self.county_results(state_lookup, county_name) } return Response(county_response) else: self.county_district_queryset( kwargs, fields_list, loc_lookup, state_lookup, scope_field_name ) district_response = { 'scope': self.scope, 'geo_layer': self.geo_layer, 'results': self.district_results(state_lookup) } return Response(district_response)
def _validate_file_format(json_request: dict) -> None: val = json_request["file_format"] if val not in FILE_FORMATS: msg = f"'{val}' is not an acceptable value for 'file_format'. Valid options: {tuple(FILE_FORMATS.keys())}" raise InvalidParameterException(msg)
def post(self, request): """Return all budget function/subfunction titles matching the provided search text""" json_request = request.data group = json_request.get('group', None) filters = json_request.get('filters', None) if group is None: raise InvalidParameterException('Missing one or more required request parameters: group') if filters is None: raise InvalidParameterException('Missing one or more required request parameters: filters') potential_groups = ['quarter', 'fiscal_year', 'month', 'fy', 'q', 'm'] if group not in potential_groups: raise InvalidParameterException('group does not have a valid value') queryset = spending_over_time(filters) filter_types = filters['award_type_codes'] if 'award_type_codes' in filters else award_type_mapping # define what values are needed in the sql query queryset = queryset.values('action_date', 'federal_action_obligation', 'original_loan_subsidy_cost') # build response response = {'group': group, 'results': []} nested_order = '' group_results = OrderedDict() # list of time_period objects ie {"fy": "2017", "quarter": "3"} : 1000 if group == 'fy' or group == 'fiscal_year': fy_set = sum_transaction_amount(queryset.values('fiscal_year'), filter_types=filter_types) for trans in fy_set: key = {'fiscal_year': str(trans['fiscal_year'])} key = str(key) group_results[key] = trans['transaction_amount'] elif group == 'm' or group == 'month': month_set = queryset.annotate(month=ExtractMonth('action_date')) \ .values('fiscal_year', 'month') month_set = sum_transaction_amount(month_set, filter_types=filter_types) for trans in month_set: # Convert month to fiscal month fiscal_month = generate_fiscal_month(date(year=2017, day=1, month=trans['month'])) key = {'fiscal_year': str(trans['fiscal_year']), 'month': str(fiscal_month)} key = str(key) group_results[key] = trans['transaction_amount'] nested_order = 'month' else: # quarterly, take months and add them up month_set = queryset.annotate(month=ExtractMonth('action_date')) \ .values('fiscal_year', 'month') month_set = sum_transaction_amount(month_set, filter_types=filter_types) for trans in month_set: # Convert month to quarter quarter = FiscalDate(2017, trans['month'], 1).quarter key = {'fiscal_year': str(trans['fiscal_year']), 'quarter': str(quarter)} key = str(key) # If key exists {fy : quarter}, aggregate if group_results.get(key) is None: group_results[key] = trans['transaction_amount'] else: if trans['transaction_amount']: group_results[key] = group_results.get(key) + trans['transaction_amount'] else: group_results[key] = group_results.get(key) nested_order = 'quarter' # convert result into expected format, sort by key to meet front-end specs results = [] # Expected results structure # [{ # 'time_period': {'fy': '2017', 'quarter': '3'}, # 'aggregated_amount': '200000000' # }] sorted_group_results = sorted( group_results.items(), key=lambda k: ( ast.literal_eval(k[0])['fiscal_year'], int(ast.literal_eval(k[0])[nested_order])) if nested_order else (ast.literal_eval(k[0])['fiscal_year'])) for key, value in sorted_group_results: key_dict = ast.literal_eval(key) result = {'time_period': key_dict, 'aggregated_amount': float(value) if value else float(0)} results.append(result) response['results'] = results return Response(response)
def generate_csvs(download_job, sqs_message=None): """Derive the relevant file location and write CSVs to it""" start_time = time.time() # Parse data from download_job json_request = json.loads(download_job.json_request) columns = json_request.get('columns', None) limit = json_request.get('limit', None) file_name = start_download(download_job) try: # Create temporary files and working directory file_path = settings.CSV_LOCAL_PATH + file_name working_dir = os.path.splitext(file_path)[0] if not os.path.exists(working_dir): os.mkdir(working_dir) write_to_log(message='Generating {}'.format(file_name), download_job=download_job) # Generate sources from the JSON request object sources = get_csv_sources(json_request) for source in sources: # Parse and write data to the file download_job.number_of_columns = max( download_job.number_of_columns, len(source.columns(columns))) parse_source(source, columns, download_job, working_dir, start_time, sqs_message, file_path, limit) download_job.file_size = os.stat(file_path).st_size except Exception as e: # Set error message; job_status_id will be set in generate_zip.handle() download_job.error_message = 'An exception was raised while attempting to write the file:\n{}'.format( str(e)) download_job.save() if isinstance(e, InvalidParameterException): raise InvalidParameterException(e) else: raise Exception(download_job.error_message) from e finally: # Remove working directory if os.path.exists(working_dir): shutil.rmtree(working_dir) try: # push file to S3 bucket, if not local if not settings.IS_LOCAL: bucket = settings.BULK_DOWNLOAD_S3_BUCKET_NAME region = settings.USASPENDING_AWS_REGION start_uploading = time.time() multipart_upload(bucket, region, file_path, os.path.basename(file_path)) write_to_log( message='Uploading took {} seconds'.format(time.time() - start_uploading), download_job=download_job) except Exception as e: # Set error message; job_status_id will be set in generate_zip.handle() download_job.error_message = 'An exception was raised while attempting to upload the file:\n{}'.format( str(e)) download_job.save() if isinstance(e, InvalidParameterException): raise InvalidParameterException(e) else: raise Exception(download_job.error_message) from e finally: # Remove generated file if not settings.IS_LOCAL and os.path.exists(file_path): os.remove(file_path) return finish_download(download_job)