def download( self, file_name, prime_award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False, ): date_range = {} if start_date: date_range["start_date"] = start_date if end_date: date_range["end_date"] = end_date json_request = { "constraint_type": "year", "filters": { "prime_award_types": prime_award_types, "agency": str(agency), "date_type": date_type, "date_range": date_range, }, "columns": columns, "file_format": file_format, } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = validate_award_request(json_request) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT["ready"], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True, ) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME download_generation.generate_download(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[: -12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.objects.filter(Prefix=file_name_prefix): if key.key == file_name: # ignore the one we just uploaded continue key.delete() logger.info("Deleting {} from bucket".format(key.key)) else: queue = get_sqs_queue( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def restart_download_operation(self): if process_is_local(): self.update_download_job(job_status_id=JOB_STATUS_DICT["ready"], error_message=None) download_generation.generate_download( download_job=self.download_job) else: self.push_job_to_queue() self.update_download_job(job_status_id=JOB_STATUS_DICT["queued"], error_message=None)
def process_request(self, download_job: DownloadJob): if settings.IS_LOCAL and settings.RUN_LOCAL_DOWNLOAD_IN_PROCESS: # Eagerly execute the download in this running process download_generation.generate_download(download_job) else: # Send a SQS message that will be processed by another server which will eventually run # download_generation.generate_download(download_source) (see download_sqs_worker.py) write_to_log( message= f"Passing download_job {download_job.download_job_id} to SQS", download_job=download_job) queue = get_sqs_queue( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS download_generation.generate_download(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # download_generation.write_csvs(**kwargs) (see download_sqs_worker.py) write_to_log( message= f"Passing download_job {download_job.download_job_id} to SQS", download_job=download_job) queue = get_sqs_queue_resource( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def restart_download_operation(self): self.update_download_job( error_message=None, file_size=0, job_status_id=JOB_STATUS_DICT["queued"] if process_is_local() else JOB_STATUS_DICT["ready"], number_of_columns=0, number_of_rows=0, update_date=datetime.now(timezone.utc), ) if process_is_local(): download_generation.generate_download( download_job=self.download_job) else: self.push_job_to_queue()
def download_service_app(download_job_id): download_job = retrieve_download_job_from_db(download_job_id) write_to_log( message="Starting new Download Service App with pid {}".format( os.getpid()), download_job=download_job) # Retrieve the data and write to the data files try: generate_download(download_job=download_job) except Exception: write_to_log(message="Caught exception", download_job=download_job, is_error=True) return 11 # arbitrary positive integer return 0
def download_service_app(download_job_id): with SubprocessTrace( name=f"job.{JOB_TYPE}.download", service="bulk-download", span_type=SpanTypes.WORKER, ) as span: download_job = _retrieve_download_job_from_db(download_job_id) download_job_details = download_job_to_log_dict(download_job) log_job_message( logger=logger, message="Starting processing of download request", job_type=JOB_TYPE, job_id=download_job_id, other_params=download_job_details, ) span.set_tags(download_job_details) generate_download(download_job=download_job)
def download_service_app(download_job_id): with tracer.trace(name=f"job.{JOB_TYPE}.download", service="bulk-download", span_type=SpanTypes.WORKER) as span: # Set True to add trace to App Analytics: # - https://docs.datadoghq.com/tracing/app_analytics/?tab=python#custom-instrumentation span.set_tag(ANALYTICS_SAMPLE_RATE_KEY, 1.0) download_job = _retrieve_download_job_from_db(download_job_id) download_job_details = download_job_to_log_dict(download_job) log_job_message( logger=logger, message="Starting processing of download request", job_type=JOB_TYPE, job_id=download_job_id, other_params=download_job_details, ) span.set_tags(download_job_details) generate_download(download_job=download_job)