def download(
        self,
        file_name,
        prime_award_types=None,
        agency=None,
        sub_agency=None,
        date_type=None,
        start_date=None,
        end_date=None,
        columns=[],
        file_format="csv",
        monthly_download=False,
        cleanup=False,
        use_sqs=False,
    ):
        date_range = {}
        if start_date:
            date_range["start_date"] = start_date
        if end_date:
            date_range["end_date"] = end_date
        json_request = {
            "constraint_type": "year",
            "filters": {
                "prime_award_types": prime_award_types,
                "agency": str(agency),
                "date_type": date_type,
                "date_range": date_range,
            },
            "columns": columns,
            "file_format": file_format,
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = validate_award_request(json_request)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT["ready"],
            file_name=file_name,
            json_request=json.dumps(order_nested_object(validated_request)),
            monthly_download=True,
        )

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            download_generation.generate_download(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:
                                             -12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.objects.filter(Prefix=file_name_prefix):
                    if key.key == file_name:
                        # ignore the one we just uploaded
                        continue
                    key.delete()
                    logger.info("Deleting {} from bucket".format(key.key))
        else:
            queue = get_sqs_queue(
                queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))
예제 #2
0
 def restart_download_operation(self):
     if process_is_local():
         self.update_download_job(job_status_id=JOB_STATUS_DICT["ready"],
                                  error_message=None)
         download_generation.generate_download(
             download_job=self.download_job)
     else:
         self.push_job_to_queue()
         self.update_download_job(job_status_id=JOB_STATUS_DICT["queued"],
                                  error_message=None)
예제 #3
0
 def process_request(self, download_job: DownloadJob):
     if settings.IS_LOCAL and settings.RUN_LOCAL_DOWNLOAD_IN_PROCESS:
         # Eagerly execute the download in this running process
         download_generation.generate_download(download_job)
     else:
         # Send a SQS message that will be processed by another server which will eventually run
         # download_generation.generate_download(download_source) (see download_sqs_worker.py)
         write_to_log(
             message=
             f"Passing download_job {download_job.download_job_id} to SQS",
             download_job=download_job)
         queue = get_sqs_queue(
             queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
         queue.send_message(MessageBody=str(download_job.download_job_id))
 def process_request(self, download_job):
     if settings.IS_LOCAL:
         # Locally, we do not use SQS
         download_generation.generate_download(download_job=download_job)
     else:
         # Send a SQS message that will be processed by another server which will eventually run
         # download_generation.write_csvs(**kwargs) (see download_sqs_worker.py)
         write_to_log(
             message=
             f"Passing download_job {download_job.download_job_id} to SQS",
             download_job=download_job)
         queue = get_sqs_queue_resource(
             queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
         queue.send_message(MessageBody=str(download_job.download_job_id))
예제 #5
0
    def restart_download_operation(self):
        self.update_download_job(
            error_message=None,
            file_size=0,
            job_status_id=JOB_STATUS_DICT["queued"]
            if process_is_local() else JOB_STATUS_DICT["ready"],
            number_of_columns=0,
            number_of_rows=0,
            update_date=datetime.now(timezone.utc),
        )

        if process_is_local():
            download_generation.generate_download(
                download_job=self.download_job)
        else:
            self.push_job_to_queue()
def download_service_app(download_job_id):
    download_job = retrieve_download_job_from_db(download_job_id)
    write_to_log(
        message="Starting new Download Service App with pid {}".format(
            os.getpid()),
        download_job=download_job)

    # Retrieve the data and write to the data files
    try:
        generate_download(download_job=download_job)
    except Exception:
        write_to_log(message="Caught exception",
                     download_job=download_job,
                     is_error=True)
        return 11  # arbitrary positive integer

    return 0
def download_service_app(download_job_id):
    with SubprocessTrace(
            name=f"job.{JOB_TYPE}.download",
            service="bulk-download",
            span_type=SpanTypes.WORKER,
    ) as span:
        download_job = _retrieve_download_job_from_db(download_job_id)
        download_job_details = download_job_to_log_dict(download_job)
        log_job_message(
            logger=logger,
            message="Starting processing of download request",
            job_type=JOB_TYPE,
            job_id=download_job_id,
            other_params=download_job_details,
        )
        span.set_tags(download_job_details)
        generate_download(download_job=download_job)
예제 #8
0
def download_service_app(download_job_id):
    with tracer.trace(name=f"job.{JOB_TYPE}.download",
                      service="bulk-download",
                      span_type=SpanTypes.WORKER) as span:
        # Set True to add trace to App Analytics:
        # - https://docs.datadoghq.com/tracing/app_analytics/?tab=python#custom-instrumentation
        span.set_tag(ANALYTICS_SAMPLE_RATE_KEY, 1.0)

        download_job = _retrieve_download_job_from_db(download_job_id)
        download_job_details = download_job_to_log_dict(download_job)
        log_job_message(
            logger=logger,
            message="Starting processing of download request",
            job_type=JOB_TYPE,
            job_id=download_job_id,
            other_params=download_job_details,
        )
        span.set_tags(download_job_details)
        generate_download(download_job=download_job)