def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None,
                 start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False,
                 use_sqs=False):
        date_range = {}
        if start_date:
            date_range['start_date'] = start_date
        if end_date:
            date_range['end_date'] = end_date
        json_request = {
            'constraint_type': 'year',
            'award_levels': award_levels,
            'filters': {
                'award_types': award_types,
                'agency': str(agency),
                'date_type': date_type,
                'date_range': date_range,
            },
            'columns': columns,
            'file_format': file_format
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = validate_award_request(json_request)
        download_job = DownloadJob.objects.create(job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name,
                                                  json_request=json.dumps(order_nested_object(validated_request)),
                                                  monthly_download=True)

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            csv_generation.generate_csvs(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:-12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.objects.filter(Prefix=file_name_prefix):
                    if key.key == file_name:
                        # ignore the one we just uploaded
                        continue
                    key.delete()
                    logger.info('Deleting {} from bucket'.format(key.key))
        else:
            queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))
    def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None,
                 start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False,
                 use_sqs=False):
        date_range = {}
        if start_date:
            date_range['start_date'] = start_date
        if end_date:
            date_range['end_date'] = end_date
        json_request = {
            'constraint_type': 'year',
            'award_levels': award_levels,
            'filters': {
                'award_types': award_types,
                'agency': str(agency),
                'date_type': date_type,
                'date_range': date_range,
            },
            'columns': columns,
            'file_format': file_format
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = download_viewset.validate_award_request(json_request)
        download_job = DownloadJob.objects.create(job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name,
                                                  json_request=json.dumps(order_nested_object(validated_request)),
                                                  monthly_download=True)

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            csv_generation.generate_csvs(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:-12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.objects.filter(Prefix=file_name_prefix):
                    if key.key == file_name:
                        # ignore the one we just uploaded
                        continue
                    key.delete()
                    logger.info('Deleting {} from bucket'.format(key.key))
        else:
            queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))
    def post(self, request, request_type='award'):
        if request_type == 'award':
            json_request = validate_award_request(request.data)
        elif request_type == 'idv':
            json_request = validate_idv_request(request.data)
        else:
            json_request = validate_account_request(request.data)

        json_request['request_type'] = request_type
        ordered_json_request = json.dumps(order_nested_object(json_request))

        # Check if the same request has been called today
        # TODO!!! Use external_data_load_date to determine data freshness
        updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc),
                                                   "%Y-%m-%d")
        cached_download = (DownloadJob.objects.filter(
            json_request=ordered_json_request,
            update_date__gte=updated_date_timestamp).exclude(
                job_status_id=JOB_STATUS_DICT["failed"]).values(
                    "download_job_id", "file_name").first())

        if cached_download and not settings.IS_LOCAL:
            # By returning the cached files, there should be no duplicates on a daily basis
            write_to_log(
                message='Generating file from cached download job ID: {}'.
                format(cached_download['download_job_id']))
            cached_filename = cached_download['file_name']
            return self.get_download_response(file_name=cached_filename)

        request_agency = json_request.get('filters', {}).get('agency', None)
        final_output_zip_name = create_unique_filename(json_request,
                                                       request_agency)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT['ready'],
            file_name=final_output_zip_name,
            json_request=ordered_json_request)

        log_new_download_job(request, download_job)
        self.process_request(download_job)

        return self.get_download_response(file_name=final_output_zip_name)
    def post(self, request, request_type='award'):
        if request_type == 'award':
            json_request = self.validate_award_request(request.data)
        else:
            json_request = self.validate_account_request(request.data)

        json_request['request_type'] = request_type
        ordered_json_request = json.dumps(order_nested_object(json_request))

        # Check if the same request has been called today
        # TODO!!! Use external_data_load_date to determine data freshness
        updated_date_timestamp = datetime.strftime(datetime.now(timezone.utc), "%Y-%m-%d")
        cached_download = (
            DownloadJob.objects.filter(json_request=ordered_json_request, update_date__gte=updated_date_timestamp)
            .exclude(job_status_id=JOB_STATUS_DICT["failed"])
            .values("download_job_id", "file_name")
            .first()
        )

        if cached_download and not settings.IS_LOCAL:
            # By returning the cached files, there should be no duplicates on a daily basis
            write_to_log(
                message='Generating file from cached download job ID: {}'.format(cached_download['download_job_id'])
            )
            cached_filename = cached_download['file_name']
            return self.get_download_response(file_name=cached_filename)

        request_agency = json_request.get('filters', {}).get('agency', None)
        final_output_zip_name = create_unique_filename(json_request["download_types"], request_agency)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT['ready'], file_name=final_output_zip_name, json_request=ordered_json_request
        )

        log_new_download_job(request, download_job)
        self.process_request(download_job)

        return self.get_download_response(file_name=final_output_zip_name)
示例#5
0
 def prepare_key(self, key_dict):
     # Order the key_dict using the order_nested_object function to make sure cache keys are always exactly the same
     ordered_key_dict = json.dumps(order_nested_object(key_dict))
     key_hex = hashlib.md5(ordered_key_dict.encode("utf-8")).hexdigest()
     return key_hex
def test_order_nested_object():
    assert order_nested_object("A") == "A"
    assert order_nested_object([2, 1]) == [1, 2]
    assert order_nested_object({"B": 1, "A": 2}) == OrderedDict((("A", 2), ("B", 1)))
    assert order_nested_object({"B": [2, 1], "A": [4, 3]}) == OrderedDict((("A", [3, 4]), ("B", [1, 2])))
    assert order_nested_object({"tas_codes": [2, 1]}) == {"tas_codes": [1, 2]}

    # Not "require": [[]] so should sort.
    assert order_nested_object({"tas_codes": {"require": {"A": [2, 1]}}}) == {"tas_codes": {"require": {"A": [1, 2]}}}

    # Inner lists for require and exclude for naics_codes, psc_codes, and tas_codes should not
    # be sorted.  Everything else should be.
    assert order_nested_object(
        {
            "tas_codes": {
                "whatever": [["Service", "B", "B5", "B502"], ["D", "C"]],
                "require": [["Service", "B", "B5"], ["D", "C", "A"]],
                "exclude": [["Service", "B", "B5", "B502"], ["D", "C"]],
            },
            "some_other_codes_we_dont_care_about": {
                "whatever": [["Service", "B", "B5", "B502"], ["D", "C"]],
                "require": [["Service", "B", "B5"], ["D", "C", "A"]],
                "exclude": [["Service", "B", "B5", "B502"], ["D", "C"]],
            },
            "psc_codes": {
                "whatever": [["Service", "B", "B5", "B502"], ["D", "C"]],
                "require": [["Service", "B", "B5"], ["D", "C", "A"]],
                "exclude": [["Service", "B", "B5", "B502"], ["D", "C"]],
            },
            "naics_codes": {
                "whatever": [["Service", "B", "B5", "B502"], ["D", "C"]],
                "require": [["Service", "B", "B5"], ["D", "C", "A"]],
                "exclude": [["Service", "B", "B5", "B502"], ["D", "C"]],
            },
        }
    ) == OrderedDict(
        [
            (
                "naics_codes",
                OrderedDict(
                    [
                        ("exclude", [["D", "C"], ["Service", "B", "B5", "B502"]]),
                        ("require", [["D", "C", "A"], ["Service", "B", "B5"]]),
                        ("whatever", [["B", "B5", "B502", "Service"], ["C", "D"]]),
                    ]
                ),
            ),
            (
                "psc_codes",
                OrderedDict(
                    [
                        ("exclude", [["D", "C"], ["Service", "B", "B5", "B502"]]),
                        ("require", [["D", "C", "A"], ["Service", "B", "B5"]]),
                        ("whatever", [["B", "B5", "B502", "Service"], ["C", "D"]]),
                    ]
                ),
            ),
            (
                "some_other_codes_we_dont_care_about",
                OrderedDict(
                    [
                        ("exclude", [["B", "B5", "B502", "Service"], ["C", "D"]]),
                        ("require", [["A", "C", "D"], ["B", "B5", "Service"]]),
                        ("whatever", [["B", "B5", "B502", "Service"], ["C", "D"]]),
                    ]
                ),
            ),
            (
                "tas_codes",
                OrderedDict(
                    [
                        ("exclude", [["D", "C"], ["Service", "B", "B5", "B502"]]),
                        ("require", [["D", "C", "A"], ["Service", "B", "B5"]]),
                        ("whatever", [["B", "B5", "B502", "Service"], ["C", "D"]]),
                    ]
                ),
            ),
        ]
    )