def download(self,
                 file_name,
                 award_levels,
                 award_types=None,
                 agency=None,
                 sub_agency=None,
                 date_type=None,
                 start_date=None,
                 end_date=None,
                 columns=[],
                 file_format="csv",
                 monthly_download=False,
                 cleanup=False,
                 use_sqs=False):
        date_range = {}
        if start_date:
            date_range['start_date'] = start_date
        if end_date:
            date_range['end_date'] = end_date
        json_request = {
            'award_levels': award_levels,
            'filters': {
                'award_types': award_types,
                'agency': str(agency),
                'date_type': date_type,
                'date_range': date_range,
            },
            'columns': columns,
            'file_format': file_format
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = download_viewset.validate_request(json_request)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT['ready'],
            file_name=file_name,
            json_request=json.dumps(order_nested_object(validated_request)),
            monthly_download=True)

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            csv_generation.generate_csvs(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:
                                             -12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.list(prefix=file_name_prefix):
                    if key.name == file_name:
                        # ignore the one we just uploaded
                        continue
                    self.bucket.delete_key(key.name)
                    logger.info('Deleting {} from bucket'.format(key.name))
        else:
            # Send a SQS message that will be processed by another server, which will eventually run
            # csv_generation.generate_csvs(download_job, message) (see generate_zip.py)
            queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION,
                              QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))
예제 #2
0
 def process_request(self, download_job):
     if settings.IS_LOCAL:
         # Locally, we do not use SQS
         csv_generation.generate_csvs(download_job=download_job)
     else:
         # Send a SQS message that will be processed by another server which will eventually run
         # csv_generation.write_csvs(**kwargs) (see generate_zip.py)
         write_to_log(message='Passing download_job {} to SQS'.format(download_job.download_job_id),
                      download_job=download_job)
         queue = sqs_queue(QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
         queue.send_message(MessageBody=str(download_job.download_job_id))
예제 #3
0
    def handle(self, *args, **options):
        """Run the application."""
        queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION,
                          QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)

        write_to_log(message='Starting SQS polling')
        while True:
            second_attempt = True
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(
                    WaitTimeSeconds=10,
                    MessageAttributeNames=['All'],
                    VisibilityTimeout=DEFAULT_VISIBILITY_TIMEOUT)
                for message in messages:
                    write_to_log(
                        message='Message Received: {}'.format(message))
                    if message.body is not None:
                        # Retrieve and update the job
                        download_job = DownloadJob.objects.filter(
                            download_job_id=int(message.body)).first()
                        second_attempt = download_job.error_message is not None

                        # Retrieve the data and write to the CSV(s)
                        write_to_log(
                            message='Starting to work on DownloadJob {}'.
                            format(download_job.download_job_id),
                            download_job=download_job)
                        csv_generation.generate_csvs(download_job=download_job,
                                                     sqs_message=message)

                        # If successful, we do not want to run again; delete
                        message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process
                logger.error(e)
                write_to_log(message=str(e),
                             download_job=download_job,
                             is_error=True)

                if download_job:
                    download_job.error_message = str(e)
                    download_job.job_status_id = JOB_STATUS_DICT[
                        'failed' if second_attempt else 'ready']
                    download_job.save()
            finally:
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately, instead of
                # waiting for the timeout window to expire
                for message in messages:
                    try:
                        message.change_visibility(VisibilityTimeout=0)
                    except botocore.exceptions.ClientError:
                        # TODO: check existence instead of catching error
                        continue