def download( self, file_name, prime_award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False, ): date_range = {} if start_date: date_range["start_date"] = start_date if end_date: date_range["end_date"] = end_date json_request = { "constraint_type": "year", "filters": { "prime_award_types": prime_award_types, "agency": str(agency), "date_type": date_type, "date_range": date_range, }, "columns": columns, "file_format": file_format, } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = validate_award_request(json_request) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT["ready"], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True, ) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME download_generation.generate_download(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[: -12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.objects.filter(Prefix=file_name_prefix): if key.key == file_name: # ignore the one we just uploaded continue key.delete() logger.info("Deleting {} from bucket".format(key.key)) else: queue = get_sqs_queue( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def post(self, request: Request, request_type: str = "award", origination: Optional[str] = None): if request_type == "award": json_request = validate_award_request(request.data) elif request_type == "idv": json_request = validate_idv_request(request.data) elif request_type == "contract": json_request = validate_contract_request(request.data) elif request_type == "assistance": json_request = validate_assistance_request(request.data) else: json_request = validate_account_request(request.data) json_request["request_type"] = request_type ordered_json_request = json.dumps(order_nested_object(json_request)) # Check if the same request has been called today # TODO!!! Use external_data_load_date to determine data freshness updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc), "%Y-%m-%d") cached_download = (DownloadJob.objects.filter( json_request=ordered_json_request, update_date__gte=updated_date_timestamp).exclude( job_status_id=JOB_STATUS_DICT["failed"]).values( "download_job_id", "file_name").first()) if cached_download and not settings.IS_LOCAL: # By returning the cached files, there should be no duplicates on a daily basis write_to_log( message= f"Generating file from cached download job ID: {cached_download['download_job_id']}" ) cached_filename = cached_download["file_name"] return self.get_download_response(file_name=cached_filename) final_output_zip_name = create_unique_filename(json_request, origination=origination) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT["ready"], file_name=final_output_zip_name, json_request=ordered_json_request) log_new_download_job(request, download_job) self.process_request(download_job) return self.get_download_response(file_name=final_output_zip_name)
def post(self, request, request_type='award'): if request_type == 'award': json_request = validate_award_request(request.data) elif request_type == 'idv': json_request = validate_idv_request(request.data) else: json_request = validate_account_request(request.data) json_request['request_type'] = request_type ordered_json_request = json.dumps(order_nested_object(json_request)) # Check if the same request has been called today # TODO!!! Use external_data_load_date to determine data freshness updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc), "%Y-%m-%d") cached_download = (DownloadJob.objects.filter( json_request=ordered_json_request, update_date__gte=updated_date_timestamp).exclude( job_status_id=JOB_STATUS_DICT["failed"]).values( "download_job_id", "file_name").first()) if cached_download and not settings.IS_LOCAL: # By returning the cached files, there should be no duplicates on a daily basis write_to_log( message='Generating file from cached download job ID: {}'. format(cached_download['download_job_id'])) cached_filename = cached_download['file_name'] return self.get_download_response(file_name=cached_filename) request_agency = json_request.get('filters', {}).get('agency', None) final_output_zip_name = create_unique_filename(json_request, request_agency) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=final_output_zip_name, json_request=ordered_json_request) log_new_download_job(request, download_job) self.process_request(download_job) return self.get_download_response(file_name=final_output_zip_name)