def get_file_path(file_name: str) -> str: if settings.IS_LOCAL: file_path = settings.CSV_LOCAL_PATH + file_name else: s3_handler = S3Handler( bucket_name=settings.BULK_DOWNLOAD_S3_BUCKET_NAME, redirect_dir=settings.BULK_DOWNLOAD_S3_REDIRECT_DIR) file_path = s3_handler.get_simple_url(file_name=file_name) return file_path
class ListMonthlyDownloadsViewset(APIDocumentationView): """ This route lists all the agencies and the subagencies or federal accounts associated under specific agencies. endpoint_doc: /download/list_downloads.md """ s3_handler = S3Handler( bucket_name=settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME, redirect_dir=settings.MONTHLY_DOWNLOAD_S3_REDIRECT_DIR) # This is intentionally not cached so that the latest updates to these monthly generated files are always returned def post(self, request): """Return list of downloads that match the requested params""" agency_id = request.data.get('agency', None) fiscal_year = request.data.get('fiscal_year', None) type_param = request.data.get('type', None) # Check required params required_params = { 'agency': agency_id, 'fiscal_year': fiscal_year, 'type': type_param } for required, param_value in required_params.items(): if param_value is None: raise InvalidParameterException( 'Missing one or more required query parameters: {}'.format( required)) # Capitalize type_param and retrieve agency information from agency ID download_type = type_param.capitalize() if agency_id == 'all': agency = {'cgac_code': 'all', 'name': 'All', 'abbreviation': None} else: agency_check = ToptierAgency.objects.filter( toptier_agency_id=agency_id).values('cgac_code', 'name', 'abbreviation') if agency_check: agency = agency_check[0] else: raise InvalidParameterException( '{} agency not found'.format(agency_id)) # Populate regex monthly_download_prefixes = '{}_{}_{}'.format(fiscal_year, agency['cgac_code'], download_type) monthly_download_regex = '{}_Full_.*\.zip'.format( monthly_download_prefixes) delta_download_prefixes = '{}_{}'.format(agency['cgac_code'], download_type) delta_download_regex = '{}_Delta_.*\.zip'.format( delta_download_prefixes) # Retrieve and filter the files we need bucket = boto3.resource('s3', region_name=self.s3_handler.region).Bucket( self.s3_handler.bucketRoute) monthly_download_names = list( filter( re.compile(monthly_download_regex).search, [ key.key for key in bucket.objects.filter( Prefix=monthly_download_prefixes) ])) delta_download_names = list( filter( re.compile(delta_download_regex).search, [ key.key for key in bucket.objects.filter( Prefix=delta_download_prefixes) ])) # Generate response downloads = [] for filename in monthly_download_names: downloads.append( self.create_download_response_obj(filename, fiscal_year, type_param, agency)) for filename in delta_download_names: downloads.append( self.create_download_response_obj(filename, None, type_param, agency, is_delta=True)) return Response({'monthly_files': downloads}) def create_download_response_obj(self, filename, fiscal_year, type_param, agency, is_delta=False): """Return a """ regex = '(.*)_(.*)_Delta_(.*)\.zip' if is_delta else '(.*)_(.*)_(.*)_Full_(.*)\.zip' filename_data = re.findall(regex, filename)[0] # Simply adds dashes for the date, 20180101 -> 2018-01-01, could also use strftime unformatted_date = filename_data[2 if is_delta else 3] updated_date = '-'.join([ unformatted_date[:4], unformatted_date[4:6], unformatted_date[6:] ]) return { 'fiscal_year': fiscal_year, 'agency_name': agency['name'], 'agency_acronym': agency['abbreviation'], 'type': type_param, 'updated_date': updated_date, 'file_name': filename, 'url': self.s3_handler.get_simple_url(file_name=filename) }
class BaseDownloadViewSet(APIDocumentationView): s3_handler = S3Handler(bucket_name=settings.BULK_DOWNLOAD_S3_BUCKET_NAME, redirect_dir=settings.BUCK_DOWNLOAD_S3_REDIRECT_DIR) def post(self, request, request_type='award'): """Push a message to SQS with the validated request JSON""" json_request = (self.validate_award_request(request.data) if request_type == 'award' else self.validate_account_request(request.data)) json_request['request_type'] = request_type ordered_json_request = json.dumps(order_nested_object(json_request)) # Check if the same request has been called today updated_date_timestamp = datetime.datetime.strftime( datetime.datetime.utcnow(), '%Y-%m-%d') cached_download = DownloadJob.objects. \ filter(json_request=ordered_json_request, update_date__gte=updated_date_timestamp). \ exclude(job_status_id=4).values('download_job_id', 'file_name') if cached_download and not settings.IS_LOCAL: # By returning the cached files, there should be no duplicates on a daily basis write_to_log( message='Generating file from cached download job ID: {}'. format(cached_download[0]['download_job_id'])) cached_filename = cached_download[0]['file_name'] return self.get_download_response(file_name=cached_filename) # Create download name and timestamped name for uniqueness toptier_agency_filter = ToptierAgency.objects.filter( toptier_agency_id=json_request.get('filters', {}).get( 'agency', None)).first() download_name = '{}_{}'.format( toptier_agency_filter.cgac_code if toptier_agency_filter else 'all', '_'.join(VALUE_MAPPINGS[award_level]['download_name'] for award_level in json_request['download_types'])) timestamped_file_name = self.s3_handler.get_timestamped_filename( download_name + '.zip') download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=timestamped_file_name, json_request=ordered_json_request) write_to_log(message='Starting new download job'.format( download_job.download_job_id), download_job=download_job, other_params={'request_addr': get_remote_addr(request)}) self.process_request(download_job) return self.get_download_response(file_name=timestamped_file_name) def validate_award_request(self, request_data): """Analyze request and raise any formatting errors as Exceptions""" json_request = {} constraint_type = request_data.get('constraint_type', None) # Validate required parameters for required_param in ['award_levels', 'filters']: if required_param not in request_data: raise InvalidParameterException( 'Missing one or more required query parameters: {}'.format( required_param)) if not isinstance(request_data['award_levels'], list): raise InvalidParameterException( 'Award levels parameter not provided as a list') elif len(request_data['award_levels']) == 0: raise InvalidParameterException( 'At least one award level is required.') for award_level in request_data['award_levels']: if award_level not in VALUE_MAPPINGS: raise InvalidParameterException( 'Invalid award_level: {}'.format(award_level)) json_request['download_types'] = request_data['award_levels'] # Overriding all other filters if the keyword filter is provided in year-constraint download # Make sure this is after checking the award_levels if constraint_type == 'year' and 'elasticsearch_keyword' in request_data[ 'filters']: json_request['filters'] = { 'elasticsearch_keyword': request_data['filters']['elasticsearch_keyword'], 'award_type_codes': list(award_type_mapping.keys()) } json_request['limit'] = settings.MAX_DOWNLOAD_LIMIT return json_request if not isinstance(request_data['filters'], dict): raise InvalidParameterException( 'Filters parameter not provided as a dict') elif len(request_data['filters']) == 0: raise InvalidParameterException('At least one filter is required.') json_request['filters'] = {} # Set defaults of non-required parameters json_request['columns'] = request_data.get('columns', []) json_request['file_format'] = request_data.get('file_format', 'csv') # Validate shared filter types and assign defaults filters = request_data['filters'] check_types_and_assign_defaults(filters, json_request['filters'], SHARED_AWARD_FILTER_DEFAULTS) # Validate award type types if not filters.get('award_type_codes', None) or len(filters['award_type_codes']) < 1: filters['award_type_codes'] = list(award_type_mapping.keys()) for award_type_code in filters['award_type_codes']: if award_type_code not in award_type_mapping: raise InvalidParameterException( 'Invalid award_type: {}'.format(award_type_code)) json_request['filters']['award_type_codes'] = filters[ 'award_type_codes'] # Validate locations for location_filter in [ 'place_of_performance_locations', 'recipient_locations' ]: if filters.get(location_filter): for location_dict in filters[location_filter]: if not isinstance(location_dict, dict): raise InvalidParameterException( 'Location is not a dictionary: {}'.format( location_dict)) location_error_handling(location_dict.keys()) json_request['filters'][location_filter] = filters[ location_filter] # Validate time periods total_range_count = validate_time_periods(filters, json_request) if constraint_type == 'row_count': # Validate limit exists and is below MAX_DOWNLOAD_LIMIT json_request['limit'] = parse_limit(request_data) # Validate row_count-constrainted filter types and assign defaults check_types_and_assign_defaults(filters, json_request['filters'], ROW_CONSTRAINT_FILTER_DEFAULTS) elif constraint_type == 'year': # Validate combined total dates within one year (allow for leap years) if total_range_count > 366: raise InvalidParameterException( 'Invalid Parameter: time_period total days must be within a year' ) # Validate year-constrainted filter types and assign defaults check_types_and_assign_defaults(filters, json_request['filters'], YEAR_CONSTRAINT_FILTER_DEFAULTS) else: raise InvalidParameterException( 'Invalid parameter: constraint_type must be "row_count" or "year"' ) return json_request def validate_account_request(self, request_data): json_request = {} json_request['columns'] = request_data.get('columns', []) # Validate required parameters for required_param in ["account_level", "filters"]: if required_param not in request_data: raise InvalidParameterException( 'Missing one or more required query parameters: {}'.format( required_param)) # Validate account_level parameters if request_data.get('account_level', None) not in [ "federal_account", "treasury_account" ]: raise InvalidParameterException( 'Invalid Parameter: account_level must be either "federal_account" or ' '"treasury_account"') json_request['account_level'] = request_data['account_level'] # Validate the filters parameter and its contents json_request['filters'] = {} filters = request_data['filters'] if not isinstance(filters, dict): raise InvalidParameterException( 'Filters parameter not provided as a dict') elif len(filters) == 0: raise InvalidParameterException('At least one filter is required.') # Validate required filters for required_filter in ["fy", "quarter"]: if required_filter not in filters: raise InvalidParameterException( 'Missing one or more required filters: {}'.format( required_filter)) else: try: filters[required_filter] = int(filters[required_filter]) except (TypeError, ValueError): raise InvalidParameterException( '{} filter not provided as an integer'.format( required_filter)) json_request['filters'][required_filter] = filters[required_filter] # Validate fiscal_quarter if json_request['filters']['quarter'] not in [1, 2, 3, 4]: raise InvalidParameterException( 'quarter filter must be a valid fiscal quarter (1, 2, 3, or 4)' ) # Validate submission_type parameters if filters.get('submission_type', None) not in [ "account_balances", "object_class_program_activity", "award_financial" ]: raise InvalidParameterException( 'Invalid Parameter: submission_type must be "account_balances", ' '"object_class_program_activity", or "award_financial"') json_request['download_types'] = [filters['submission_type']] # Validate the rest of the filters check_types_and_assign_defaults(filters, json_request['filters'], ACCOUNT_FILTER_DEFAULTS) return json_request def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS csv_generation.generate_csvs(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # csv_generation.write_csvs(**kwargs) (see generate_zip.py) write_to_log(message='Passing download_job {} to SQS'.format( download_job.download_job_id), download_job=download_job) queue = sqs_queue(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id)) def get_download_response(self, file_name): """Generate download response which encompasses various elements to provide accurate status for state of a download job""" download_job = DownloadJob.objects.filter(file_name=file_name).first() if not download_job: raise NotFound( 'Download job with filename {} does not exist.'.format( file_name)) # Compile url to file file_path = settings.CSV_LOCAL_PATH + file_name if settings.IS_LOCAL else \ self.s3_handler.get_simple_url(file_name=file_name) # Add additional response elements that should be part of anything calling this function response = { 'status': download_job.job_status.name, 'url': file_path, 'message': download_job.error_message, 'file_name': file_name, # converting size from bytes to kilobytes if file_size isn't None 'total_size': download_job.file_size / 1000 if download_job.file_size else None, 'total_columns': download_job.number_of_columns, 'total_rows': download_job.number_of_rows, 'seconds_elapsed': download_job.seconds_elapsed() } return Response(response)
class ListMonthlyDownloadsViewset(APIDocumentationView): """ This route lists all the agencies and the subagencies or federal accounts associated under specific agencies. endpoint_doc: /download/list_downloads.md """ s3_handler = S3Handler(name=settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME, region=settings.BULK_DOWNLOAD_AWS_REGION) # This is intentionally not cached so that the latest updates to these monthly generated files are always returned def post(self, request): """Return list of downloads that match the requested params""" response_data = {} post_data = request.data agency_id = post_data.get('agency', None) fiscal_year = post_data.get('fiscal_year', None) download_type = post_data.get('type', None) required_params = { 'agency': agency_id, 'fiscal_year': fiscal_year, 'type': download_type } for required_param, param_value in required_params.items(): if param_value is None: raise InvalidParameterException( 'Missing one or more required query parameters: {}'.format( required_param)) # Populate regex fiscal_year_regex = str(fiscal_year) if fiscal_year else '\d{4}' download_type_regex = download_type.capitalize( ) if download_type else '(Contracts|Assistance)' cgac_regex = '.*' if agency_id and agency_id == 'all': cgac_regex = 'all' elif agency_id: cgac_codes = ToptierAgency.objects.filter( toptier_agency_id=agency_id).values('cgac_code') if cgac_codes: cgac_regex = cgac_codes[0]['cgac_code'] else: raise InvalidParameterException( '{} agency not found'.format(agency_id)) monthly_dl_regex = '{}_{}_{}_Full_.*\.zip'.format( fiscal_year_regex, cgac_regex, download_type_regex) # Generate regex possible prefix prefixes = [] for regex, add_regex in [(fiscal_year_regex, fiscal_year), (cgac_regex, agency_id), (download_type_regex, download_type)]: if not add_regex: break prefixes.append(regex) prefix = '_'.join(prefixes) # Get and filter the files we need bucket_name = self.s3_handler.bucketRoute region_name = S3Handler.REGION bucket = boto.s3.connect_to_region(region_name).get_bucket(bucket_name) monthly_dls_names = list( filter( re.compile(monthly_dl_regex).search, [key.name for key in bucket.list(prefix=prefix)])) # Generate response downloads = [] for name in monthly_dls_names: name_data = re.findall('(.*)_(.*)_(.*)_Full_(.*)\.zip', name)[0] agency_name = None agency_abbr = None agency_cgac = name_data[1] if agency_cgac != 'all': agency = ToptierAgency.objects.filter( cgac_code=agency_cgac).values('name', 'abbreviation') if agency: agency_name = agency[0]['name'] agency_abbr = agency[0]['abbreviation'] else: agency_name = 'All' # Simply adds dashes for the date, 20180101 -> 2018-01-01, could also use strftime updated_date = '-'.join( [name_data[3][:4], name_data[3][4:6], name_data[3][6:]]) downloads.append({ 'fiscal_year': name_data[0], 'agency_name': agency_name, 'agency_acronym': agency_abbr, 'type': name_data[2].lower(), 'updated_date': updated_date, 'file_name': name, 'url': self.s3_handler.get_simple_url(file_name=name) }) response_data['monthly_files'] = downloads return Response(response_data)
class BaseDownloadViewSet(APIDocumentationView): s3_handler = S3Handler(bucket_name=settings.BULK_DOWNLOAD_S3_BUCKET_NAME, redirect_dir=settings.BULK_DOWNLOAD_S3_REDIRECT_DIR) def post(self, request, request_type='award'): if request_type == 'award': json_request = validate_award_request(request.data) elif request_type == 'idv': json_request = validate_idv_request(request.data) else: json_request = validate_account_request(request.data) json_request['request_type'] = request_type ordered_json_request = json.dumps(order_nested_object(json_request)) # Check if the same request has been called today # TODO!!! Use external_data_load_date to determine data freshness updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc), "%Y-%m-%d") cached_download = (DownloadJob.objects.filter( json_request=ordered_json_request, update_date__gte=updated_date_timestamp).exclude( job_status_id=JOB_STATUS_DICT["failed"]).values( "download_job_id", "file_name").first()) if cached_download and not settings.IS_LOCAL: # By returning the cached files, there should be no duplicates on a daily basis write_to_log( message='Generating file from cached download job ID: {}'. format(cached_download['download_job_id'])) cached_filename = cached_download['file_name'] return self.get_download_response(file_name=cached_filename) request_agency = json_request.get('filters', {}).get('agency', None) final_output_zip_name = create_unique_filename(json_request, request_agency) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=final_output_zip_name, json_request=ordered_json_request) log_new_download_job(request, download_job) self.process_request(download_job) return self.get_download_response(file_name=final_output_zip_name) def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS csv_generation.generate_csvs(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # csv_generation.write_csvs(**kwargs) (see download_sqs_worker.py) write_to_log(message='Passing download_job {} to SQS'.format( download_job.download_job_id), download_job=download_job) queue = get_sqs_queue_resource( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id)) def get_download_response(self, file_name): """Generate download response which encompasses various elements to provide accurate status for state of a download job""" download_job = DownloadJob.objects.filter(file_name=file_name).first() if not download_job: raise NotFound( 'Download job with filename {} does not exist.'.format( file_name)) # Compile url to file if settings.IS_LOCAL: file_path = settings.CSV_LOCAL_PATH + file_name else: file_path = self.s3_handler.get_simple_url(file_name=file_name) # Add additional response elements that should be part of anything calling this function response = { 'status': download_job.job_status.name, 'url': file_path, 'message': download_job.error_message, 'file_name': file_name, # converting size from bytes to kilobytes if file_size isn't None 'total_size': download_job.file_size / 1000 if download_job.file_size else None, 'total_columns': download_job.number_of_columns, 'total_rows': download_job.number_of_rows, 'seconds_elapsed': download_job.seconds_elapsed(), } return Response(response)
class ListMonthlyDownloadsViewSet(APIView): """ Returns a list of the current versions of generated archive files for a given fiscal year and agency. """ endpoint_doc = "usaspending_api/api_contracts/contracts/v2/bulk_download/list_monthly_files.md" s3_handler = S3Handler( bucket_name=settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME, redirect_dir=settings.MONTHLY_DOWNLOAD_S3_REDIRECT_DIR) # This is intentionally not cached so that the latest updates to these monthly generated files are always returned def post(self, request): """Return list of downloads that match the requested params""" agency_id = request.data.get("agency", None) fiscal_year = request.data.get("fiscal_year", None) type_param = request.data.get("type", None) # Check required params required_params = { "agency": agency_id, "fiscal_year": fiscal_year, "type": type_param } for required, param_value in required_params.items(): if param_value is None: raise InvalidParameterException( "Missing one or more required body parameters: {}".format( required)) # Capitalize type_param and retrieve agency information from agency ID download_type = type_param.capitalize() if agency_id == "all": agency = { "toptier_code": "All", "name": "All", "abbreviation": None } else: agency_check = ToptierAgency.objects.filter( toptier_agency_id=agency_id).values("toptier_code", "name", "abbreviation") if agency_check: agency = agency_check[0] else: raise InvalidParameterException( "{} agency not found".format(agency_id)) # Populate regex monthly_download_prefixes = f"FY{fiscal_year}_{agency['toptier_code']}_{download_type}" monthly_download_regex = r"{}_Full_.*\.zip".format( monthly_download_prefixes) delta_download_prefixes = f"FY(All)_{agency['toptier_code']}_{download_type}" delta_download_regex = r"FY\(All\)_{}_{}_Delta_.*\.zip".format( agency["toptier_code"], download_type) # Retrieve and filter the files we need bucket = boto3.resource("s3", region_name=self.s3_handler.region).Bucket( self.s3_handler.bucketRoute) monthly_download_names = list( filter( re.compile(monthly_download_regex).search, [ key.key for key in bucket.objects.filter( Prefix=monthly_download_prefixes) ], )) delta_download_names = list( filter( re.compile(delta_download_regex).search, [ key.key for key in bucket.objects.filter( Prefix=delta_download_prefixes) ], )) ########################################## # TEMPORARY 2019/12/12. REMOVE after 2020/01/15 # KEEP old_* prefix and regex around until monthly files using the new format are # generated and accessible in S3 if agency["toptier_code"] == "All": agency["toptier_code"] = "all" old_monthly_download_prefixes = "{}_{}_{}".format( fiscal_year, agency["toptier_code"], download_type) old_monthly_download_regex = r"{}_Full_.*\.zip".format( old_monthly_download_prefixes) old_delta_download_prefixes = "{}_{}".format(agency["toptier_code"], download_type) old_delta_download_regex = r"{}_Delta_.*\.zip".format( old_delta_download_prefixes) monthly_download_names.extend( list( filter( re.compile(old_monthly_download_regex).search, [ key.key for key in bucket.objects.filter( Prefix=old_monthly_download_prefixes) ], ))) delta_download_names.extend( list( filter( re.compile(old_delta_download_regex).search, [ key.key for key in bucket.objects.filter( Prefix=old_delta_download_prefixes) ], ))) ########################################## ########################################## # Generate response downloads = [] for filename in monthly_download_names: downloads.append( self.create_download_response_obj(filename, fiscal_year, type_param, agency)) for filename in delta_download_names: downloads.append( self.create_download_response_obj(filename, None, type_param, agency, is_delta=True)) return Response({"monthly_files": downloads}) def create_download_response_obj(self, filename, fiscal_year, type_param, agency, is_delta=False): """Return a """ regex = r"(.*)_(.*)_Delta_(.*)\.zip" if is_delta else r"(.*)_(.*)_(.*)_Full_(.*)\.zip" filename_data = re.findall(regex, filename)[0] # Simply adds dashes for the date, 20180101 -> 2018-01-01, could also use strftime unformatted_date = filename_data[2 if is_delta else 3] updated_date = "-".join([ unformatted_date[:4], unformatted_date[4:6], unformatted_date[6:] ]) return { "fiscal_year": fiscal_year, "agency_name": agency["name"], "agency_acronym": agency["abbreviation"], "type": type_param, "updated_date": updated_date, "file_name": filename, "url": self.s3_handler.get_simple_url(file_name=filename), }
class BaseDownloadViewSet(APIView): s3_handler = S3Handler(name=settings.BULK_DOWNLOAD_S3_BUCKET_NAME, region=settings.BULK_DOWNLOAD_AWS_REGION) def post(self, request): """Push a message to SQS with the validated request JSON""" json_request = self.validate_request(request.data) ordered_json_request = json.dumps(order_nested_object(json_request)) # Check if the same request has been called today updated_date_timestamp = datetime.datetime.strftime( datetime.datetime.utcnow(), '%Y-%m-%d') cached_download = DownloadJob.objects.filter( json_request=ordered_json_request, update_date__gte=updated_date_timestamp).exclude( job_status_id=4).values('file_name') if cached_download: # By returning the cached files, there should be no duplicates on a daily basis cached_filename = cached_download[0]['file_name'] return self.get_download_response(file_name=cached_filename) # Create download name and timestamped name for uniqueness download_name = '_'.join( VALUE_MAPPINGS[award_level]['download_name'] for award_level in json_request['award_levels']) timestamped_file_name = self.s3_handler.get_timestamped_filename( download_name + '.zip') download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=timestamped_file_name, json_request=ordered_json_request) write_to_log(message='Starting new download job'.format( download_job.download_job_id), download_job=download_job, other_params={'request_addr': get_remote_addr(request)}) self.process_request(download_job) return self.get_download_response(file_name=timestamped_file_name) def validate_request(self, json_request): """Analyze request and raise any formatting errors as Exceptions""" constraint_type = json_request.get('constraint_type', None) # Overriding all other filters if the keyword filter is provided in year-constraint download if constraint_type == 'year' and 'elasticsearch_keyword' in json_request[ 'filters']: json_request['filters'] = { 'elasticsearch_keyword': json_request['filters']['elasticsearch_keyword'], 'award_type_codes': list(award_type_mapping.keys()) } json_request['limit'] = settings.MAX_DOWNLOAD_LIMIT return json_request # Validate required parameters for required_param in ['award_levels', 'filters']: if required_param not in json_request: raise InvalidParameterException( 'Missing one or more required query parameters: {}'.format( required_param)) if not isinstance(json_request['award_levels'], list): raise InvalidParameterException( 'Award levels parameter not provided as a list') elif len(json_request['award_levels']) == 0: raise InvalidParameterException( 'At least one award level is required.') for award_level in json_request['award_levels']: if award_level not in VALUE_MAPPINGS: raise InvalidParameterException( 'Invalid award_level: {}'.format(award_level)) if not isinstance(json_request['filters'], dict): raise InvalidParameterException( 'Filters parameter not provided as a dict') elif len(json_request['filters']) == 0: raise InvalidParameterException('At least one filter is required.') # Set defaults of non-required parameters json_request['columns'] = json_request.get('columns', []) json_request['file_format'] = json_request.get('file_format', 'csv') # Validate shared filter types and assign defaults filters = json_request['filters'] check_types_and_assign_defaults(filters, SHARED_FILTER_DEFAULTS) # Validate award type types if not filters.get('award_type_codes', None) or len(filters['award_type_codes']) < 1: filters['award_type_codes'] = list(award_type_mapping.keys()) for award_type_code in filters['award_type_codes']: if award_type_code not in award_type_mapping: raise InvalidParameterException( 'Invalid award_type: {}'.format(award_type_code)) # Validate time periods total_range_count = validate_time_periods(filters) if constraint_type == 'row_count': # Validate limit exists and is below MAX_DOWNLOAD_LIMIT json_request['limit'] = parse_limit(json_request) # Validate row_count-constrainted filter types and assign defaults check_types_and_assign_defaults(filters, ROW_CONSTRAINT_FILTER_DEFAULTS) elif constraint_type == 'year': # Validate combined total dates within one year (allow for leap years) if total_range_count > 366: raise InvalidParameterException( 'Invalid Parameter: time_period total days must be within a year' ) # Validate year-constrainted filter types and assign defaults check_types_and_assign_defaults(filters, YEAR_CONSTRAINT_FILTER_DEFAULTS) else: raise InvalidParameterException( 'Invalid parameter: constraint_type must be "row_count" or "year"' ) return json_request def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS csv_generation.generate_csvs(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # csv_generation.write_csvs(**kwargs) (see generate_zip.py) write_to_log(message='Passing download_job {} to SQS'.format( download_job.download_job_id), download_job=download_job) queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION, QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id)) def get_download_response(self, file_name): """Generate download response which encompasses various elements to provide accurate status for state of a download job""" download_job = DownloadJob.objects.filter(file_name=file_name).first() if not download_job: raise NotFound( 'Download job with filename {} does not exist.'.format( file_name)) # Compile url to file file_path = settings.CSV_LOCAL_PATH + file_name if settings.IS_LOCAL else \ self.s3_handler.get_simple_url(file_name=file_name) # Add additional response elements that should be part of anything calling this function response = { 'status': download_job.job_status.name, 'url': file_path, 'message': download_job.error_message, 'file_name': file_name, # converting size from bytes to kilobytes if file_size isn't None 'total_size': download_job.file_size / 1000 if download_job.file_size else None, 'total_columns': download_job.number_of_columns, 'total_rows': download_job.number_of_rows, 'seconds_elapsed': download_job.seconds_elapsed() } return Response(response)