def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False): date_range = {} if start_date: date_range['start_date'] = start_date if end_date: date_range['end_date'] = end_date json_request = { 'constraint_type': 'year', 'award_levels': award_levels, 'filters': { 'award_types': award_types, 'agency': str(agency), 'date_type': date_type, 'date_range': date_range, }, 'columns': columns, 'file_format': file_format } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = validate_award_request(json_request) download_job = DownloadJob.objects.create(job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[:-12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.objects.filter(Prefix=file_name_prefix): if key.key == file_name: # ignore the one we just uploaded continue key.delete() logger.info('Deleting {} from bucket'.format(key.key)) else: queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False): date_range = {} if start_date: date_range['start_date'] = start_date if end_date: date_range['end_date'] = end_date json_request = { 'constraint_type': 'year', 'award_levels': award_levels, 'filters': { 'award_types': award_types, 'agency': str(agency), 'date_type': date_type, 'date_range': date_range, }, 'columns': columns, 'file_format': file_format } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = download_viewset.validate_award_request(json_request) download_job = DownloadJob.objects.create(job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[:-12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.objects.filter(Prefix=file_name_prefix): if key.key == file_name: # ignore the one we just uploaded continue key.delete() logger.info('Deleting {} from bucket'.format(key.key)) else: queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def post(self, request, request_type='award'): if request_type == 'award': json_request = validate_award_request(request.data) elif request_type == 'idv': json_request = validate_idv_request(request.data) else: json_request = validate_account_request(request.data) json_request['request_type'] = request_type ordered_json_request = json.dumps(order_nested_object(json_request)) # Check if the same request has been called today # TODO!!! Use external_data_load_date to determine data freshness updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc), "%Y-%m-%d") cached_download = (DownloadJob.objects.filter( json_request=ordered_json_request, update_date__gte=updated_date_timestamp).exclude( job_status_id=JOB_STATUS_DICT["failed"]).values( "download_job_id", "file_name").first()) if cached_download and not settings.IS_LOCAL: # By returning the cached files, there should be no duplicates on a daily basis write_to_log( message='Generating file from cached download job ID: {}'. format(cached_download['download_job_id'])) cached_filename = cached_download['file_name'] return self.get_download_response(file_name=cached_filename) request_agency = json_request.get('filters', {}).get('agency', None) final_output_zip_name = create_unique_filename(json_request, request_agency) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=final_output_zip_name, json_request=ordered_json_request) log_new_download_job(request, download_job) self.process_request(download_job) return self.get_download_response(file_name=final_output_zip_name)
def post(self, request, request_type='award'): if request_type == 'award': json_request = self.validate_award_request(request.data) else: json_request = self.validate_account_request(request.data) json_request['request_type'] = request_type ordered_json_request = json.dumps(order_nested_object(json_request)) # Check if the same request has been called today # TODO!!! Use external_data_load_date to determine data freshness updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc), "%Y-%m-%d") cached_download = ( DownloadJob.objects.filter(json_request=ordered_json_request, update_date__gte=updated_date_timestamp) .exclude(job_status_id=JOB_STATUS_DICT["failed"]) .values("download_job_id", "file_name") .first() ) if cached_download and not settings.IS_LOCAL: # By returning the cached files, there should be no duplicates on a daily basis write_to_log( message='Generating file from cached download job ID: {}'.format(cached_download['download_job_id']) ) cached_filename = cached_download['file_name'] return self.get_download_response(file_name=cached_filename) request_agency = json_request.get('filters', {}).get('agency', None) final_output_zip_name = create_unique_filename(json_request["download_types"], request_agency) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=final_output_zip_name, json_request=ordered_json_request ) log_new_download_job(request, download_job) self.process_request(download_job) return self.get_download_response(file_name=final_output_zip_name)
def prepare_key(self, key_dict): # Order the key_dict using the order_nested_object function to make sure cache keys are always exactly the same ordered_key_dict = json.dumps(order_nested_object(key_dict)) key_hex = hashlib.md5(ordered_key_dict.encode("utf-8")).hexdigest() return key_hex
def test_order_nested_object(): assert order_nested_object("A") == "A" assert order_nested_object([2, 1]) == [1, 2] assert order_nested_object({"B": 1, "A": 2}) == OrderedDict((("A", 2), ("B", 1))) assert order_nested_object({"B": [2, 1], "A": [4, 3]}) == OrderedDict((("A", [3, 4]), ("B", [1, 2]))) assert order_nested_object({"tas_codes": [2, 1]}) == {"tas_codes": [1, 2]} # Not "require": [[]] so should sort. assert order_nested_object({"tas_codes": {"require": {"A": [2, 1]}}}) == {"tas_codes": {"require": {"A": [1, 2]}}} # Inner lists for require and exclude for naics_codes, psc_codes, and tas_codes should not # be sorted. Everything else should be. assert order_nested_object( { "tas_codes": { "whatever": [["Service", "B", "B5", "B502"], ["D", "C"]], "require": [["Service", "B", "B5"], ["D", "C", "A"]], "exclude": [["Service", "B", "B5", "B502"], ["D", "C"]], }, "some_other_codes_we_dont_care_about": { "whatever": [["Service", "B", "B5", "B502"], ["D", "C"]], "require": [["Service", "B", "B5"], ["D", "C", "A"]], "exclude": [["Service", "B", "B5", "B502"], ["D", "C"]], }, "psc_codes": { "whatever": [["Service", "B", "B5", "B502"], ["D", "C"]], "require": [["Service", "B", "B5"], ["D", "C", "A"]], "exclude": [["Service", "B", "B5", "B502"], ["D", "C"]], }, "naics_codes": { "whatever": [["Service", "B", "B5", "B502"], ["D", "C"]], "require": [["Service", "B", "B5"], ["D", "C", "A"]], "exclude": [["Service", "B", "B5", "B502"], ["D", "C"]], }, } ) == OrderedDict( [ ( "naics_codes", OrderedDict( [ ("exclude", [["D", "C"], ["Service", "B", "B5", "B502"]]), ("require", [["D", "C", "A"], ["Service", "B", "B5"]]), ("whatever", [["B", "B5", "B502", "Service"], ["C", "D"]]), ] ), ), ( "psc_codes", OrderedDict( [ ("exclude", [["D", "C"], ["Service", "B", "B5", "B502"]]), ("require", [["D", "C", "A"], ["Service", "B", "B5"]]), ("whatever", [["B", "B5", "B502", "Service"], ["C", "D"]]), ] ), ), ( "some_other_codes_we_dont_care_about", OrderedDict( [ ("exclude", [["B", "B5", "B502", "Service"], ["C", "D"]]), ("require", [["A", "C", "D"], ["B", "B5", "Service"]]), ("whatever", [["B", "B5", "B502", "Service"], ["C", "D"]]), ] ), ), ( "tas_codes", OrderedDict( [ ("exclude", [["D", "C"], ["Service", "B", "B5", "B502"]]), ("require", [["D", "C", "A"], ["Service", "B", "B5"]]), ("whatever", [["B", "B5", "B502", "Service"], ["C", "D"]]), ] ), ), ] )