def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS csv_generation.generate_csvs(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # csv_generation.write_csvs(**kwargs) (see download_sqs_worker.py) write_to_log( message='Passing download_job {} to SQS'.format(download_job.download_job_id), download_job=download_job ) queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS csv_generation.generate_csvs(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # csv_generation.write_csvs(**kwargs) (see download_sqs_worker.py) write_to_log(message='Passing download_job {} to SQS'.format( download_job.download_job_id), download_job=download_job) queue = get_sqs_queue_resource( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def handle(self, *args, **options): """Run the application.""" queue = get_sqs_queue_resource( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) write_to_log(message="Starting SQS polling") while True: second_attempt = True try: # Grabs one (or more) messages from the queue messages = queue.receive_messages( WaitTimeSeconds=10, MessageAttributeNames=["All"], VisibilityTimeout=DEFAULT_VISIBILITY_TIMEOUT) for message in messages: write_to_log( message="Message Received: {}".format(message)) if message.body is not None: # Retrieve and update the job download_job = DownloadJob.objects.filter( download_job_id=int(message.body)).first() second_attempt = download_job.error_message is not None # Retrieve the data and write to the CSV(s) csv_generation.generate_csvs(download_job=download_job, sqs_message=message) # If successful, we do not want to run again; delete message.delete() except Exception as e: logger.error(e) write_to_log(message=str(e), download_job=download_job, is_error=True) if download_job: download_job.error_message = str(e) download_job.job_status_id = JOB_STATUS_DICT[ "failed" if second_attempt else "ready"] download_job.save() finally: # Set visibility to 0 so that another attempt can be made to process in SQS immediately, instead of # waiting for the timeout window to expire for message in messages: try: message.change_visibility(VisibilityTimeout=0) except botocore.exceptions.ClientError: # TODO: check existence instead of catching error continue
def poll_sqs_for_message(queue_name, wait_time): """ Returns 0 or 1 message from the queue""" try: queue = get_sqs_queue_resource(queue_name=queue_name) sqs_message = queue.receive_messages( WaitTimeSeconds=wait_time, MessageAttributeNames=["All"], VisibilityTimeout=DEFAULT_VISIBILITY_TIMEOUT, MaxNumberOfMessages=1, ) except botocore.exceptions.ClientError: write_to_log(message="SQS connection issue. Investigate settings", is_error=True) raise SystemExit(1) return sqs_message[0] if sqs_message else None
def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False): date_range = {} if start_date: date_range['start_date'] = start_date if end_date: date_range['end_date'] = end_date json_request = { 'constraint_type': 'year', 'award_levels': award_levels, 'filters': { 'award_types': award_types, 'agency': str(agency), 'date_type': date_type, 'date_range': date_range, }, 'columns': columns, 'file_format': file_format } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = validate_award_request(json_request) download_job = DownloadJob.objects.create(job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[:-12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.objects.filter(Prefix=file_name_prefix): if key.key == file_name: # ignore the one we just uploaded continue key.delete() logger.info('Deleting {} from bucket'.format(key.key)) else: queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False): date_range = {} if start_date: date_range['start_date'] = start_date if end_date: date_range['end_date'] = end_date json_request = { 'constraint_type': 'year', 'award_levels': award_levels, 'filters': { 'award_types': award_types, 'agency': str(agency), 'date_type': date_type, 'date_range': date_range, }, 'columns': columns, 'file_format': file_format } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = download_viewset.validate_award_request(json_request) download_job = DownloadJob.objects.create(job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[:-12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.objects.filter(Prefix=file_name_prefix): if key.key == file_name: # ignore the one we just uploaded continue key.delete() logger.info('Deleting {} from bucket'.format(key.key)) else: queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def push_job_to_queue(self): # Candidate for separate object or file queue = get_sqs_queue_resource( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(self.download_job.download_job_id))