Пример #1
0
def remove_temp_cur_files(path):
    """Remove temporary cost usage report files."""
    files = os.listdir(path)

    victim_list = []
    current_assembly_id = None
    for file in files:
        file_path = '{}/{}'.format(path, file)
        if file.endswith('Manifest.json'):
            with open(file_path, 'r') as manifest_file_handle:
                manifest_json = json.load(manifest_file_handle)
                current_assembly_id = manifest_json.get('assemblyId')
        else:
            stats = ReportStatsDBAccessor(file)
            completed_date = stats.get_last_completed_datetime()
            if completed_date:
                assembly_id = utils.extract_uuids_from_string(file).pop()

                victim_list.append({'file': file_path,
                                    'completed_date': completed_date,
                                    'assemblyId': assembly_id})

    removed_files = []
    for victim in victim_list:
        if victim['assemblyId'] != current_assembly_id:
            LOG.info('Removing %s, completed processing on date %s',
                     victim['file'], victim['completed_date'])
            os.remove(victim['file'])
            removed_files.append(victim['file'])
    return removed_files
Пример #2
0
    def test_extract_uuids_from_string_capitals(self):
        """Test that a uuid is extracted from a string with capital letters."""
        assembly_id = "882083B7-EA62-4AAB-aA6a-f0d08d65Ee2b"
        cur_key = f"/koku/20180701-20180801/{assembly_id}/koku-1.csv.gz"

        uuids = common_utils.extract_uuids_from_string(cur_key)
        self.assertEqual(len(uuids), 1)
        self.assertEqual(uuids.pop(), assembly_id)
Пример #3
0
    def test_extract_uuids_from_string(self):
        """Test that a uuid is extracted from a string."""
        assembly_id = "882083b7-ea62-4aab-aa6a-f0d08d65ee2b"
        cur_key = f"/koku/20180701-20180801/{assembly_id}/koku-1.csv.gz"

        uuids = common_utils.extract_uuids_from_string(cur_key)
        self.assertEqual(len(uuids), 1)
        self.assertEqual(uuids.pop(), assembly_id)
    def get_billing_source(self):
        """
        Retrieve the name of the S3 bucket from the SNS notification.

        FIXME: Diving through the response body to extract values such as
        object_key and s3_bucket_name could be improved upon.  A concerted effort
        to determine whether or not there will always be one item in "Records"
        should be done to know that this method will be reliable.

        FIXME: The filtering for only top-level manifest files was done as a quick
        way to limit the number of download/processing requests.  If we can find a
        better way to ensure we are only responding to one notification when multiple
        files have changed in the bucket then the filtering could be removed.

        Args:
            None

        Returns:
            (String) Name of the billing source from the notification

        Raises:
            (NotificationInterfaceFilter): Is raised when the notification is not for
                                           a top-level manifest file.
            (AWSNotificationHandlerError): Is raised when parsing the notification body
                                           message fails.

        """
        s3_bucket_name = None
        if self._msg_type == 'Notification' and self._body:
            body_dict = json.loads(self._body)
            message_dict = json.loads(body_dict['Message'])

            # There must be a more reliable way of getting bucket name..
            object_key = None
            s3_bucket_name = None
            try:
                object_key = message_dict['Records'][0]['s3']['object']['key']
                s3_bucket_name = message_dict['Records'][0]['s3']['bucket'][
                    'name']
            except KeyError:
                raise AWSNotificationHandlerError(
                    'Unexpected \"Message\" element in body.')

            if object_key.endswith('Manifest.json'):
                if utils.extract_uuids_from_string(object_key):
                    msg = 'Ignoring non-toplevel manifest file: {}'.format(
                        object_key)
                    raise NotificationInterfaceFilter(msg)

            if not object_key.endswith('Manifest.json'):
                msg = 'Ignoring non-manifest file: {}'.format(object_key)
                raise NotificationInterfaceFilter(msg)

        return s3_bucket_name if s3_bucket_name else None
    def _get_manifest(self, date_time):
        """
        Download and return the CUR manifest for the given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            (Dict): A dict-like object serialized from JSON data.

        """
        report_path = self._get_report_path(date_time)
        manifest = {}

        local_path = '{}/{}/{}'.format(self.local_storage, self.container_name,
                                       report_path)

        if not os.path.exists(local_path):
            LOG.error('Unable to find manifest.')
            return manifest

        report_names = os.listdir(local_path)
        sorted_by_modified_date = sorted(
            report_names,
            key=lambda file: os.path.getmtime(f'{local_path}/{file}'))
        if sorted_by_modified_date:
            report_name = report_names[0]  # First item on list is most recent

        try:
            manifest['assemblyId'] = extract_uuids_from_string(
                report_name).pop()
        except IndexError:
            message = 'Unable to extract assemblyID from %s'.format(
                report_name)
            raise AzureReportDownloaderError(message)

        billing_period = {
            'start': (report_path.split('/')[-1]).split('-')[0],
            'end': (report_path.split('/')[-1]).split('-')[1]
        }
        manifest['billingPeriod'] = billing_period
        manifest['reportKeys'] = [f'{local_path}/{report_name}']
        manifest['Compression'] = UNCOMPRESSED

        manifest_file = '{}/{}'.format(self._get_exports_data_directory(),
                                       'Manifest.json')
        with open(manifest_file, 'w') as manifest_hdl:
            manifest_hdl.write(json.dumps(manifest))

        return manifest
    def _get_manifest(self, date_time):
        """
        Download and return the CUR manifest for the given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            (Dict): A dict-like object serialized from JSON data.

        """
        report_path = self._get_report_path(date_time)
        manifest = {}

        local_path = f"{self.local_storage}/{self.container_name}/{report_path}"

        if not os.path.exists(local_path):
            msg = f"Unable to find manifest: {local_path}."
            LOG.error(log_json(self.request_id, msg, self.context))
            return manifest

        manifest_modified_timestamp = None
        report_names = os.listdir(local_path)
        sorted_by_modified_date = sorted(
            report_names,
            key=lambda file: os.path.getmtime(f"{local_path}/{file}"))
        if sorted_by_modified_date:
            report_name = report_names[0]  # First item on list is most recent
            full_file_path = f"{local_path}/{report_name}"
            manifest_modified_timestamp = datetime.datetime.fromtimestamp(
                os.path.getmtime(full_file_path))

        try:
            manifest["assemblyId"] = extract_uuids_from_string(
                report_name).pop()
        except IndexError:
            message = f"Unable to extract assemblyID from {report_name}"
            raise AzureReportDownloaderError(message)

        billing_period = {
            "start": (report_path.split("/")[-1]).split("-")[0],
            "end": (report_path.split("/")[-1]).split("-")[1],
        }
        manifest["billingPeriod"] = billing_period
        manifest["reportKeys"] = [f"{local_path}/{report_name}"]
        manifest["Compression"] = UNCOMPRESSED

        return manifest, manifest_modified_timestamp
Пример #7
0
def get_assembly_id_from_cur_key(key):
    """
    Get the assembly ID from a cost and usage report key.

    Args:
        key (String): Full key for a cost and usage report location.
        example: /koku/20180701-20180801/882083b7-ea62-4aab-aa6a-f0d08d65ee2b/koku-1.csv.gz

    Returns:
        (String): "Assembly ID UUID"
        example: "882083b7-ea62-4aab-aa6a-f0d08d65ee2b"

    """
    assembly_id = utils.extract_uuids_from_string(key)
    assembly_id = assembly_id.pop() if assembly_id else None

    return assembly_id
Пример #8
0
    def _get_manifest(self, date_time):
        """
        Download and return the CUR manifest for the given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            (Dict): A dict-like object serialized from JSON data.

        """
        report_path = self._get_report_path(date_time)
        manifest = {}
        try:
            blob = self._azure_client.get_latest_cost_export_for_path(
                report_path, self.container_name)
        except AzureCostReportNotFound as ex:
            LOG.error('Unable to find manifest. Error: %s', str(ex))
            return manifest
        report_name = blob.name

        try:
            manifest['assemblyId'] = extract_uuids_from_string(
                report_name).pop()
        except IndexError:
            message = 'Unable to extract assemblyID from %s'.format(
                report_name)
            raise AzureReportDownloaderError(message)

        billing_period = {
            'start': (report_path.split('/')[-1]).split('-')[0],
            'end': (report_path.split('/')[-1]).split('-')[1]
        }
        manifest['billingPeriod'] = billing_period
        manifest['reportKeys'] = [report_name]
        manifest['Compression'] = UNCOMPRESSED

        manifest_file = '{}/{}'.format(self._get_exports_data_directory(),
                                       'Manifest.json')
        with open(manifest_file, 'w') as manifest_hdl:
            manifest_hdl.write(json.dumps(manifest))

        return manifest
Пример #9
0
    def _get_manifest(self, date_time):
        """
        Download and return the CUR manifest for the given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            (Dict): A dict-like object serialized from JSON data.

        """
        report_path = self._get_report_path(date_time)
        manifest = {}
        try:
            blob = self._azure_client.get_latest_cost_export_for_path(
                report_path, self.container_name)
        except AzureCostReportNotFound as ex:
            msg = f"Unable to find manifest. Error: {str(ex)}"
            LOG.info(log_json(self.request_id, msg, self.context))
            return manifest, None
        report_name = blob.name

        try:
            manifest["assemblyId"] = extract_uuids_from_string(
                report_name).pop()
        except IndexError:
            message = f"Unable to extract assemblyID from {report_name}"
            raise AzureReportDownloaderError(message)

        billing_period = {
            "start": (report_path.split("/")[-1]).split("-")[0],
            "end": (report_path.split("/")[-1]).split("-")[1],
        }
        manifest["billingPeriod"] = billing_period
        manifest["reportKeys"] = [report_name]
        manifest["Compression"] = UNCOMPRESSED

        return manifest, blob.last_modified
Пример #10
0
    def remove_temp_cur_files(self, report_path, manifest_id):
        """Remove temporary cost usage report files."""
        files = listdir(report_path)

        LOG.info('Cleaning up temporary report files for %s',
                 self._report_name)
        victim_list = []
        current_assembly_id = None
        for file in files:
            file_path = '{}/{}'.format(report_path, file)
            if file.endswith('Manifest.json'):
                with open(file_path, 'r') as manifest_file_handle:
                    manifest_json = json.load(manifest_file_handle)
                    current_assembly_id = manifest_json.get('assemblyId')
            else:
                with ReportStatsDBAccessor(file, manifest_id) as stats:
                    completed_date = stats.get_last_completed_datetime()
                    if completed_date:
                        assembly_id = extract_uuids_from_string(file).pop()

                        victim_list.append({
                            'file': file_path,
                            'completed_date': completed_date,
                            'assemblyId': assembly_id
                        })

        removed_files = []
        for victim in victim_list:
            if victim['assemblyId'] != current_assembly_id:
                try:
                    LOG.info('Removing %s, completed processing on date %s',
                             victim['file'], victim['completed_date'])
                    remove(victim['file'])
                    removed_files.append(victim['file'])
                except FileNotFoundError:
                    LOG.warning('Unable to locate file: %s', victim['file'])
        return removed_files