def download(
        self,
        file_name,
        prime_award_types=None,
        agency=None,
        sub_agency=None,
        date_type=None,
        start_date=None,
        end_date=None,
        columns=[],
        file_format="csv",
        monthly_download=False,
        cleanup=False,
        use_sqs=False,
    ):
        date_range = {}
        if start_date:
            date_range["start_date"] = start_date
        if end_date:
            date_range["end_date"] = end_date
        json_request = {
            "constraint_type": "year",
            "filters": {
                "prime_award_types": prime_award_types,
                "agency": str(agency),
                "date_type": date_type,
                "date_range": date_range,
            },
            "columns": columns,
            "file_format": file_format,
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = validate_award_request(json_request)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT["ready"],
            file_name=file_name,
            json_request=json.dumps(order_nested_object(validated_request)),
            monthly_download=True,
        )

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            download_generation.generate_download(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:
                                             -12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.objects.filter(Prefix=file_name_prefix):
                    if key.key == file_name:
                        # ignore the one we just uploaded
                        continue
                    key.delete()
                    logger.info("Deleting {} from bucket".format(key.key))
        else:
            queue = get_sqs_queue(
                queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))
Exemplo n.º 2
0
    def post(self,
             request: Request,
             request_type: str = "award",
             origination: Optional[str] = None):
        if request_type == "award":
            json_request = validate_award_request(request.data)
        elif request_type == "idv":
            json_request = validate_idv_request(request.data)
        elif request_type == "contract":
            json_request = validate_contract_request(request.data)
        elif request_type == "assistance":
            json_request = validate_assistance_request(request.data)
        else:
            json_request = validate_account_request(request.data)

        json_request["request_type"] = request_type
        ordered_json_request = json.dumps(order_nested_object(json_request))

        # Check if the same request has been called today
        # TODO!!! Use external_data_load_date to determine data freshness
        updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc),
                                                   "%Y-%m-%d")
        cached_download = (DownloadJob.objects.filter(
            json_request=ordered_json_request,
            update_date__gte=updated_date_timestamp).exclude(
                job_status_id=JOB_STATUS_DICT["failed"]).values(
                    "download_job_id", "file_name").first())

        if cached_download and not settings.IS_LOCAL:
            # By returning the cached files, there should be no duplicates on a daily basis
            write_to_log(
                message=
                f"Generating file from cached download job ID: {cached_download['download_job_id']}"
            )
            cached_filename = cached_download["file_name"]
            return self.get_download_response(file_name=cached_filename)

        final_output_zip_name = create_unique_filename(json_request,
                                                       origination=origination)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT["ready"],
            file_name=final_output_zip_name,
            json_request=ordered_json_request)

        log_new_download_job(request, download_job)
        self.process_request(download_job)

        return self.get_download_response(file_name=final_output_zip_name)
    def post(self, request, request_type='award'):
        if request_type == 'award':
            json_request = validate_award_request(request.data)
        elif request_type == 'idv':
            json_request = validate_idv_request(request.data)
        else:
            json_request = validate_account_request(request.data)

        json_request['request_type'] = request_type
        ordered_json_request = json.dumps(order_nested_object(json_request))

        # Check if the same request has been called today
        # TODO!!! Use external_data_load_date to determine data freshness
        updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc),
                                                   "%Y-%m-%d")
        cached_download = (DownloadJob.objects.filter(
            json_request=ordered_json_request,
            update_date__gte=updated_date_timestamp).exclude(
                job_status_id=JOB_STATUS_DICT["failed"]).values(
                    "download_job_id", "file_name").first())

        if cached_download and not settings.IS_LOCAL:
            # By returning the cached files, there should be no duplicates on a daily basis
            write_to_log(
                message='Generating file from cached download job ID: {}'.
                format(cached_download['download_job_id']))
            cached_filename = cached_download['file_name']
            return self.get_download_response(file_name=cached_filename)

        request_agency = json_request.get('filters', {}).get('agency', None)
        final_output_zip_name = create_unique_filename(json_request,
                                                       request_agency)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT['ready'],
            file_name=final_output_zip_name,
            json_request=ordered_json_request)

        log_new_download_job(request, download_job)
        self.process_request(download_job)

        return self.get_download_response(file_name=final_output_zip_name)