def post(self, request): """Push a message to SQS with the validated request JSON""" json_request = self.validate_request(request.data) ordered_json_request = json.dumps(order_nested_object(json_request)) # Check if the same request has been called today updated_date_timestamp = datetime.datetime.strftime( datetime.datetime.utcnow(), '%Y-%m-%d') cached_download = DownloadJob.objects.filter( json_request=ordered_json_request, update_date__gte=updated_date_timestamp).exclude( job_status_id=4).values('file_name') if cached_download and not settings.IS_LOCAL: # By returning the cached files, there should be no duplicates on a daily basis cached_filename = cached_download[0]['file_name'] return self.get_download_response(file_name=cached_filename) # Create download name and timestamped name for uniqueness download_name = '_'.join( VALUE_MAPPINGS[award_level]['download_name'] for award_level in json_request['award_levels']) timestamped_file_name = self.s3_handler.get_timestamped_filename( download_name + '.zip') download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=timestamped_file_name, json_request=ordered_json_request) write_to_log(message='Starting new download job'.format( download_job.download_job_id), download_job=download_job, other_params={'request_addr': get_remote_addr(request)}) self.process_request(download_job) return self.get_download_response(file_name=timestamped_file_name)
def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False): date_range = {} if start_date: date_range['start_date'] = start_date if end_date: date_range['end_date'] = end_date json_request = { 'award_levels': award_levels, 'filters': { 'award_types': award_types, 'agency': str(agency), 'date_type': date_type, 'date_range': date_range, }, 'columns': columns, 'file_format': file_format } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = download_viewset.validate_request(json_request) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[: -12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.list(prefix=file_name_prefix): if key.name == file_name: # ignore the one we just uploaded continue self.bucket.delete_key(key.name) logger.info('Deleting {} from bucket'.format(key.name)) else: # Send a SQS message that will be processed by another server, which will eventually run # csv_generation.generate_csvs(download_job, message) (see generate_zip.py) queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION, QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def get_source_dict(self, params, view_instance, view_method, request, args, kwargs): params = dict(request.query_params) params.update(dict(request.data)) if 'auditTrail' in params: del params['auditTrail'] return {'request': json.dumps(order_nested_object(params))}
def prepare_key(self, key_dict): # Order the key_dict using the order_nested_object function to make sure cache keys are always exactly the same ordered_key_dict = json.dumps(order_nested_object(key_dict)) key_hex = hashlib.md5(ordered_key_dict.encode('utf-8')).hexdigest() return key_hex