Exemplo n.º 1
0
    def test_today_with_timezone_error_raised(self):
        """Test that an error is raised with an invalid timezone."""
        string_tz = "Moon/Mare Tranquillitatis"
        accessor = DateAccessor()

        with self.assertRaises(DateAccessorError):
            accessor.today_with_timezone(string_tz)
Exemplo n.º 2
0
class ReportManifestDBAccessor(KokuDBAccess):
    """Class to interact with the koku database for CUR processing statistics."""
    def __init__(self):
        """Access the AWS report manifest database table."""
        self._schema = 'public'
        super().__init__(self._schema)
        self._table = \
            self.get_base().classes.reporting_common_costusagereportmanifest
        self.date_accessor = DateAccessor()

    def get_manifest(self, assembly_id, provider_id):
        """Get the manifest associated with the provided provider and id."""
        query = self._get_db_obj_query()
        return query.filter_by(provider_id=provider_id)\
            .filter_by(assembly_id=assembly_id).first()

    def get_manifest_by_id(self, manifest_id):
        """Get the manifest by id."""
        query = self._get_db_obj_query()
        return query.filter_by(id=manifest_id).first()

    def mark_manifest_as_updated(self, manifest):
        """Update the updated timestamp."""
        manifest.manifest_updated_datetime = \
            self.date_accessor.today_with_timezone('UTC')

    def add(self, use_savepoint=True, **kwargs):
        """
        Add a new row to the CUR stats database.

        Args:
            kwargs (dict): Fields containing CUR Manifest attributes.

            Valid keys are: assembly_id,
                            billing_period_start_datetime,
                            num_processed_files (optional),
                            num_total_files,
                            provider_id,
        Returns:
            None

        """
        if 'manifest_creation_datetime' not in kwargs:
            kwargs['manifest_creation_datetime'] = \
                self.date_accessor.today_with_timezone('UTC')

        if 'num_processed_files' not in kwargs:
            kwargs['num_processed_files'] = 0

        return super().add(use_savepoint, **kwargs)
Exemplo n.º 3
0
    def test_get_bill_ids_from_provider_with_start_and_end_date(self):
        """Test that bill IDs are returned for an AWS provider with both dates."""
        date_accessor = DateAccessor()

        with ProviderDBAccessor(
                provider_uuid=self.aws_provider_uuid) as provider_accessor:
            provider = provider_accessor.get_provider()
        with AWSReportDBAccessor(schema=self.schema) as accessor:

            end_date = date_accessor.today_with_timezone("utc").replace(day=1)
            start_date = end_date
            for i in range(2):
                start_date = start_date - relativedelta(months=i)

            bills = accessor.get_cost_entry_bills_query_by_provider(
                provider.uuid)
            with schema_context(self.schema):
                bills = (bills.filter(
                    billing_period_start__gte=start_date.date()).filter(
                        billing_period_start__lte=end_date.date()).all())
                expected_bill_ids = [str(bill.id) for bill in bills]

        bills = utils.get_bills_from_provider(self.aws_provider_uuid,
                                              self.schema,
                                              start_date=start_date,
                                              end_date=end_date)
        with schema_context(self.schema):
            bill_ids = [str(bill.id) for bill in bills]

        self.assertEqual(bill_ids, expected_bill_ids)
Exemplo n.º 4
0
    def test_get_bill_ids_from_provider_with_start_and_end_date(self):
        """Test that bill IDs are returned for an AWS provider with both dates."""
        date_accessor = DateAccessor()

        with ProviderDBAccessor(provider_uuid=self.aws_test_provider_uuid
                                ) as provider_accessor:
            provider = provider_accessor.get_provider()
        with AWSReportDBAccessor(schema=self.test_schema,
                                 column_map=self.column_map) as accessor:
            report_schema = accessor.report_schema
            creator = ReportObjectCreator(accessor, self.column_map,
                                          report_schema.column_types)

            end_date = date_accessor.today_with_timezone('utc').replace(day=1)
            start_date = end_date
            for i in range(2):
                start_date = start_date - relativedelta(months=i)
                print(start_date)
                bill = creator.create_cost_entry_bill(bill_date=start_date)

            bill_table_name = AWS_CUR_TABLE_MAP['bill']
            bill_obj = getattr(accessor.report_schema, bill_table_name)
            bills = accessor.get_cost_entry_bills_query_by_provider(
                provider.id)
            bills = bills.filter(bill_obj.billing_period_start>=start_date.date())\
                .filter(bill_obj.billing_period_start<=end_date.date()).all()
            expected_bill_ids = [str(bill.id) for bill in bills]

        bills = utils.get_bills_from_provider(self.aws_test_provider_uuid,
                                              self.test_schema,
                                              start_date=start_date,
                                              end_date=end_date)
        bill_ids = [str(bill.id) for bill in bills]

        self.assertEqual(bill_ids, expected_bill_ids)
Exemplo n.º 5
0
    def test_remove_files_not_in_set_from_s3_bucket(self):
        """Test remove_files_not_in_set_from_s3_bucket."""
        removed = utils.remove_files_not_in_set_from_s3_bucket(
            "request_id", None, "manifest_id")
        self.assertEqual(removed, [])

        date_accessor = DateAccessor()
        start_date = date_accessor.today_with_timezone("utc").replace(day=1)
        s3_csv_path = get_path_prefix("account", Provider.PROVIDER_AWS,
                                      "provider_uuid", start_date,
                                      Config.CSV_DATA_TYPE)
        expected_key = "removed_key"
        mock_object = Mock(metadata={}, key=expected_key)
        mock_summary = Mock()
        mock_summary.Object.return_value = mock_object
        with patch("masu.util.aws.common.settings", ENABLE_S3_ARCHIVING=True):
            with patch("masu.util.aws.common.get_s3_resource") as mock_s3:
                mock_s3.return_value.Bucket.return_value.objects.filter.return_value = [
                    mock_summary
                ]
                removed = utils.remove_files_not_in_set_from_s3_bucket(
                    "request_id", s3_csv_path, "manifest_id")
                self.assertEqual(removed, [expected_key])

        with patch("masu.util.aws.common.settings", ENABLE_S3_ARCHIVING=True):
            with patch("masu.util.aws.common.get_s3_resource") as mock_s3:
                mock_s3.side_effect = ClientError({}, "Error")
                removed = utils.remove_files_not_in_set_from_s3_bucket(
                    "request_id", s3_csv_path, "manifest_id")
                self.assertEqual(removed, [])
Exemplo n.º 6
0
    def test_today_with_timezone_string(self):
        """Test that a timezone string works as expected."""
        string_tz = "UTC"
        current_utc_time = datetime.utcnow()
        accessor = DateAccessor()
        result_time = accessor.today_with_timezone(string_tz)

        self.assertEqual(current_utc_time.date(), result_time.date())
        self.assertEqual(current_utc_time.hour, result_time.hour)
        self.assertEqual(current_utc_time.minute, result_time.minute)
        self.assertEqual(result_time.tzinfo, pytz.UTC)
Exemplo n.º 7
0
    def test_today_with_timezone_object(self):
        """Test that a timezone string works as expected."""
        fake_tz_name = self.fake.timezone()
        fake_tz = pytz.timezone(fake_tz_name)

        current_time = datetime.now(fake_tz)
        accessor = DateAccessor()
        result_time = accessor.today_with_timezone(fake_tz)

        self.assertEqual(current_time.date(), result_time.date())
        self.assertEqual(current_time.hour, result_time.hour)
        self.assertEqual(current_time.minute, result_time.minute)
        self.assertEqual(str(result_time.tzinfo), fake_tz_name)
Exemplo n.º 8
0
    def test_get_bill_ids_from_provider(self):
        """Test that bill IDs are returned for an AWS provider."""
        date_accessor = DateAccessor()

        with AWSReportDBAccessor(schema=self.test_schema,
                                 column_map=self.column_map) as accessor:
            report_schema = accessor.report_schema
            creator = ReportObjectCreator(accessor, self.column_map,
                                          report_schema.column_types)
            expected_bill_ids = []

            end_date = date_accessor.today_with_timezone('utc').replace(day=1)
            start_date = end_date
            for i in range(2):
                start_date = start_date - relativedelta(months=i)
                bill = creator.create_cost_entry_bill(bill_date=start_date)
                expected_bill_ids.append(str(bill.id))

        bills = utils.get_bills_from_provider(self.aws_test_provider_uuid,
                                              self.test_schema)

        bill_ids = [str(bill.id) for bill in bills]

        self.assertEqual(sorted(bill_ids), sorted(expected_bill_ids))
Exemplo n.º 9
0
    def test_get_bill_ids_from_provider(self):
        """Test that bill IDs are returned for an AWS provider."""
        date_accessor = DateAccessor()

        creator = ReportObjectCreator(self.schema, self.column_map)

        expected_bill_ids = []

        end_date = date_accessor.today_with_timezone("utc").replace(day=1)
        start_date = end_date
        for i in range(2):
            start_date = start_date - relativedelta(months=i)
            bill = creator.create_cost_entry_bill(
                provider_uuid=self.aws_provider_uuid, bill_date=start_date)
            with schema_context(self.schema):
                expected_bill_ids.append(str(bill.id))

        bills = utils.get_bills_from_provider(self.aws_provider_uuid,
                                              self.schema)

        with schema_context(self.schema):
            bill_ids = [str(bill.id) for bill in bills]

        self.assertEqual(sorted(bill_ids), sorted(expected_bill_ids))
class ProviderStatusAccessorTest(MasuTestCase):
    """Test Cases for the ProviderStatusAccessor object."""

    FAKE = Faker()

    def setUp(self):
        """Test set up."""
        super().setUp()

        self.date_accessor = DateAccessor()

        with ProviderDBAccessor(self.aws_provider_uuid) as provider_accessor:
            provider = provider_accessor.get_provider()
            self.provider_uuid = provider.uuid

    def _setup_random_status(self):
        """Set up a randomized status for testing.

        This is being done in a separate function instead of in setUp() to
        facilitate testing the case where there is no status in the DB.
        """
        self.test_status = {
            'provider_id': self.provider_uuid,
            'status': random.choice(list(ProviderStatusCode)),
            'last_message': self.FAKE.word(),
            'retries': random.randint(0, 10),
            'timestamp': self.date_accessor.today_with_timezone('UTC'),
        }

        with ProviderStatusAccessor(self.aws_provider_uuid) as accessor:
            status = accessor.add(**self.test_status)
            status.save()
            self.time_stamp = status.timestamp

    def test_init(self):
        """Test __init__() when a status is in the DB."""
        self._setup_random_status()
        with ProviderStatusAccessor(self.aws_provider_uuid) as accessor:
            self.assertIsNotNone(accessor._table)
            self.assertIsNotNone(accessor._obj)

    def test_init_wo_provider(self):
        """Test __init__() when a provider is not in the DB."""
        with self.assertRaises(MasuProviderError):
            ProviderStatusAccessor(str(uuid.uuid4()))

    def test_get_status(self):
        """Test get_status()."""
        self._setup_random_status()
        with ProviderStatusAccessor(self.aws_provider_uuid) as accessor:
            output = accessor.get_status()
            self.assertEqual(output, self.test_status.get('status'))

    def test_get_last_message(self):
        """Test get_last_message()."""
        self._setup_random_status()
        with ProviderStatusAccessor(self.aws_provider_uuid) as accessor:
            output = accessor.get_last_message()
            self.assertEqual(output, self.test_status.get('last_message'))

    def test_get_retries(self):
        """Test get_retries()."""
        self._setup_random_status()
        with ProviderStatusAccessor(self.aws_provider_uuid) as accessor:
            output = accessor.get_retries()
            self.assertEqual(output, self.test_status.get('retries'))

    def test_get_provider_uuid(self):
        """Test get_provider_uuid()."""
        self._setup_random_status()
        with ProviderStatusAccessor(self.aws_provider_uuid) as accessor:
            output = accessor.get_provider_uuid()
            self.assertEqual(output, self.aws_provider_uuid)

    def test_get_timestamp(self):
        """Test get_timestamp()."""
        self._setup_random_status()
        with ProviderStatusAccessor(self.aws_provider_uuid) as accessor:
            output = accessor.get_timestamp()
            self.assertEqual(output, self.time_stamp)
class AzureReportSummaryUpdater:
    """Class to update AWS report summary data."""

    def __init__(self, schema, provider, manifest):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with

        """
        self._schema_name = schema
        self._provider = provider
        self._manifest = manifest
        with ReportingCommonDBAccessor() as reporting_common:
            self._column_map = reporting_common.column_map
        self._date_accessor = DateAccessor()

    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        with AzureReportDBAccessor(self._schema_name, self._column_map) as accessor:
            # This is the normal processing route
            if self._manifest:
                # Override the bill date to correspond with the manifest
                bill_date = self._manifest.billing_period_start_datetime.date()
                bills = accessor.get_cost_entry_bills_query_by_provider(
                    self._provider.id
                )
                bills = bills.filter(billing_period_start=bill_date).all()

                do_month_update = False
                with schema_context(self._schema_name):
                    do_month_update = self._determine_if_full_summary_update_needed(
                        bills[0]
                    )
                if do_month_update:
                    last_day_of_month = calendar.monthrange(
                        bill_date.year,
                        bill_date.month
                    )[1]
                    start_date = bill_date.strftime('%Y-%m-%d')
                    end_date = bill_date.replace(day=last_day_of_month)
                    end_date = end_date.strftime('%Y-%m-%d')
                    LOG.info('Overriding start and end date to process full month.')

        return start_date, end_date

    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str): A start date and end date.

        """
        LOG.info('update_daily_tables for: %s-%s', str(start_date), str(end_date))
        start_date, end_date = self._get_sql_inputs(start_date, end_date)

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        LOG.info('update_summary_tables for: %s-%s', str(start_date), str(end_date))
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        bills = get_bills_from_provider(
            self._provider.uuid,
            self._schema_name,
            datetime.datetime.strptime(start_date, '%Y-%m-%d'),
            datetime.datetime.strptime(end_date, '%Y-%m-%d')
        )
        bill_ids = []
        with schema_context(self._schema_name):
            bill_ids = [str(bill.id) for bill in bills]

        with AzureReportDBAccessor(self._schema_name, self._column_map) as accessor:
            # Need these bills on the session to update dates after processing
            bills = accessor.bills_for_provider_id(self._provider.id, start_date)
            LOG.info('Updating Azure report summary tables: \n\tSchema: %s'
                     '\n\tProvider: %s \n\tDates: %s - %s',
                     self._schema_name, self._provider.uuid, start_date, end_date)
            accessor.populate_line_item_daily_summary_table(start_date, end_date, bill_ids)
            accessor.populate_tags_summary_table()
            for bill in bills:
                if bill.summary_data_creation_datetime is None:
                    bill.summary_data_creation_datetime = \
                        self._date_accessor.today_with_timezone('UTC')
                bill.summary_data_updated_datetime = \
                    self._date_accessor.today_with_timezone('UTC')
                bill.save()

            accessor.commit()
        return start_date, end_date

    def _determine_if_full_summary_update_needed(self, bill):
        """Decide whether to update summary tables for full billing period."""
        processed_files = self._manifest.num_processed_files
        total_files = self._manifest.num_total_files

        summary_creation = bill.summary_data_creation_datetime

        is_done_processing = processed_files == total_files

        is_new_bill = summary_creation is None

        # Do a full month update if we just finished processing a finalized
        # bill or we just finished processing a bill for the first time
        if is_done_processing and is_new_bill:
            return True

        return False
Exemplo n.º 12
0
class ReportSummaryUpdater:
    """Update reporting summary tables."""
    def __init__(self, customer_schema, provider_uuid, manifest_id=None):
        """
        Initializer.

        Args:
            customer_schema (str): Schema name for given customer.
            provider (str): The provider type.

        """
        self._schema = customer_schema
        self._provider_uuid = provider_uuid
        self._manifest = None
        if manifest_id is not None:
            with ReportManifestDBAccessor() as manifest_accessor:
                self._manifest = manifest_accessor.get_manifest_by_id(
                    manifest_id)
        self._date_accessor = DateAccessor()
        with ProviderDBAccessor(self._provider_uuid) as provider_accessor:
            self._provider = provider_accessor.get_provider()
        try:
            self._updater, self._ocp_cloud_updater = self._set_updater()
        except Exception as err:
            raise ReportSummaryUpdaterError(err)

        if not self._updater:
            raise ReportSummaryUpdaterError('Invalid provider type specified.')
        LOG.info('Starting report data summarization for provider uuid: %s.',
                 self._provider.uuid)

    def _set_updater(self):
        """
        Create the report summary updater object.

        Object is specific to the report provider.

        Args:
            None

        Returns:
            (Object) : Provider-specific report summary updater

        """
        if self._provider.type in (AMAZON_WEB_SERVICES,
                                   AWS_LOCAL_SERVICE_PROVIDER):
            return (AWSReportSummaryUpdater(self._schema, self._provider,
                                            self._manifest),
                    OCPCloudReportSummaryUpdater(self._schema, self._provider,
                                                 self._manifest))
        if self._provider.type in (AZURE, AZURE_LOCAL_SERVICE_PROVIDER):
            return (AzureReportSummaryUpdater(self._schema, self._provider,
                                              self._manifest),
                    OCPCloudReportSummaryUpdater(self._schema, self._provider,
                                                 self._manifest))
        if self._provider.type in (OPENSHIFT_CONTAINER_PLATFORM, ):
            return (OCPReportSummaryUpdater(self._schema, self._provider,
                                            self._manifest),
                    OCPCloudReportSummaryUpdater(self._schema, self._provider,
                                                 self._manifest))

        return None

    def _format_dates(self, start_date, end_date):
        """Convert dates to strings for use in the updater."""
        if isinstance(start_date, datetime.date):
            start_date = start_date.strftime('%Y-%m-%d')
        if isinstance(end_date, datetime.date):
            end_date = end_date.strftime('%Y-%m-%d')
        elif end_date is None:
            # Run up to the current date
            end_date = self._date_accessor.today_with_timezone('UTC')
            end_date = end_date.strftime('%Y-%m-%d')
        return start_date, end_date

    def manifest_is_ready(self):
        """Check if processing should continue."""
        if self._manifest and self._manifest.num_processed_files != self._manifest.num_total_files:
            # Bail if all manifest files have not been processed
            LOG.error(
                'Not all manifest files have completed processing.'
                'Summary deferred. Processed Files: %s, Total Files: %s',
                str(self._manifest.num_processed_files),
                str(self._manifest.num_total_files))
        return True

    def update_daily_tables(self, start_date, end_date):
        """
        Update report daily rollup tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.
            manifest_id (str): The particular manifest to use.

        Returns:
            (str, str): The start and end date strings used in the daily SQL.

        """
        start_date, end_date = self._format_dates(start_date, end_date)

        start_date, end_date = self._updater.update_daily_tables(
            start_date, end_date)

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """
        Update report summary tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.
            manifest_id (str): The particular manifest to use.

        Returns:
            None

        """
        start_date, end_date = self._format_dates(start_date, end_date)
        LOG.info('Using start date: %s', start_date)
        LOG.info('Using end date: %s', end_date)

        start_date, end_date = self._updater.update_summary_tables(
            start_date, end_date)

        self._ocp_cloud_updater.update_summary_tables(start_date, end_date)

    def update_cost_summary_table(self, start_date, end_date):
        """
        Update cost summary tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.

        Returns:
            None

        """
        start_date, end_date = self._format_dates(start_date, end_date)

        self._ocp_cloud_updater.update_cost_summary_table(start_date, end_date)
Exemplo n.º 13
0
class AWSReportDBAccessor(SQLScriptAtomicExecutorMixin, ReportDBAccessorBase):
    """Class to interact with customer reporting tables."""

    def __init__(self, schema):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with
        """
        super().__init__(schema)
        self._datetime_format = Config.AWS_DATETIME_STR_FORMAT
        self.date_accessor = DateAccessor()
        self.jinja_sql = JinjaSql()
        self._table_map = AWS_CUR_TABLE_MAP

    @property
    def line_item_daily_summary_table(self):
        return AWSCostEntryLineItemDailySummary

    @property
    def ocpall_line_item_daily_summary_table(self):
        return get_model("OCPAllCostLineItemDailySummaryP")

    @property
    def ocpall_line_item_project_daily_summary_table(self):
        return get_model("OCPAllCostLineItemProjectDailySummaryP")

    @property
    def line_item_table(self):
        return AWSCostEntryLineItem

    @property
    def cost_entry_table(self):
        return AWSCostEntry

    @property
    def line_item_daily_table(self):
        return AWSCostEntryLineItemDaily

    def get_cost_entry_bills(self):
        """Get all cost entry bill objects."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            columns = ["id", "bill_type", "payer_account_id", "billing_period_start", "provider_id"]
            bills = self._get_db_obj_query(table_name).values(*columns)
            return {
                (bill["bill_type"], bill["payer_account_id"], bill["billing_period_start"], bill["provider_id"]): bill[
                    "id"
                ]
                for bill in bills
            }

    def get_cost_entry_bills_by_date(self, start_date):
        """Return a cost entry bill for the specified start date."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            return self._get_db_obj_query(table_name).filter(billing_period_start=start_date)

    def get_cost_entry_bills_query_by_provider(self, provider_uuid):
        """Return all cost entry bills for the specified provider."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            return self._get_db_obj_query(table_name).filter(provider_id=provider_uuid)

    def bills_for_provider_uuid(self, provider_uuid, start_date=None):
        """Return all cost entry bills for provider_uuid on date."""
        bills = self.get_cost_entry_bills_query_by_provider(provider_uuid)
        if start_date:
            if isinstance(start_date, str):
                start_date = parse(start_date)
            bill_date = start_date.replace(day=1)
            bills = bills.filter(billing_period_start=bill_date)
        return bills

    def get_bill_query_before_date(self, date, provider_uuid=None):
        """Get the cost entry bill objects with billing period before provided date."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            if provider_uuid:
                cost_entry_bill_query = base_query.filter(billing_period_start__lte=date, provider_id=provider_uuid)
            else:
                cost_entry_bill_query = base_query.filter(billing_period_start__lte=date)
            return cost_entry_bill_query

    def get_lineitem_query_for_billid(self, bill_id):
        """Get the AWS cost entry line item for a given bill query."""
        table_name = AWSCostEntryLineItem
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            line_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return line_item_query

    def get_daily_query_for_billid(self, bill_id):
        """Get the AWS cost daily item for a given bill query."""
        table_name = AWSCostEntryLineItemDaily
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            daily_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return daily_item_query

    def get_summary_query_for_billid(self, bill_id):
        """Get the AWS cost summary item for a given bill query."""
        table_name = AWSCostEntryLineItemDailySummary
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return summary_item_query

    def get_ocp_aws_summary_query_for_billid(self, bill_id):
        """Get the OCP-on-AWS report summary item for a given bill query."""
        table_name = self._table_map["ocp_on_aws_daily_summary"]
        base_query = self._get_db_obj_query(table_name)
        summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
        return summary_item_query

    def get_ocp_aws_project_summary_query_for_billid(self, bill_id):
        """Get the OCP-on-AWS report project summary item for a given bill query."""
        table_name = self._table_map["ocp_on_aws_project_daily_summary"]
        base_query = self._get_db_obj_query(table_name)
        summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
        return summary_item_query

    def get_cost_entry_query_for_billid(self, bill_id):
        """Get the AWS cost entry data for a given bill query."""
        table_name = AWSCostEntry
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            line_item_query = base_query.filter(bill_id=bill_id)
            return line_item_query

    def get_cost_entries(self):
        """Make a mapping of cost entries by start time."""
        table_name = AWSCostEntry
        with schema_context(self.schema):
            cost_entries = self._get_db_obj_query(table_name).all()

            return {(ce.bill_id, ce.interval_start.strftime(self._datetime_format)): ce.id for ce in cost_entries}

    def get_products(self):
        """Make a mapping of product sku to product objects."""
        table_name = AWSCostEntryProduct
        with schema_context(self.schema):
            columns = ["id", "sku", "product_name", "region"]
            products = self._get_db_obj_query(table_name, columns=columns).all()

            return {
                (product["sku"], product["product_name"], product["region"]): product["id"] for product in products
            }

    def get_pricing(self):
        """Make a mapping of pricing values string to pricing objects."""
        table_name = AWSCostEntryPricing
        with schema_context(self.schema):
            pricing = self._get_db_obj_query(table_name).all()

            return {f"{p.term}-{p.unit}": p.id for p in pricing}

    def get_reservations(self):
        """Make a mapping of reservation ARN to reservation objects."""
        table_name = AWSCostEntryReservation
        with schema_context(self.schema):
            columns = ["id", "reservation_arn"]
            reservs = self._get_db_obj_query(table_name, columns=columns).all()

            return {res["reservation_arn"]: res["id"] for res in reservs}

    def populate_line_item_daily_table(self, start_date, end_date, bill_ids):
        """Populate the daily aggregate of line items table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.
            bill_ids (list)

        Returns
            (None)

        """
        table_name = self._table_map["line_item_daily"]

        daily_sql = pkgutil.get_data("masu.database", "sql/reporting_awscostentrylineitem_daily.sql")
        daily_sql = daily_sql.decode("utf-8")
        daily_sql_params = {
            "uuid": str(uuid.uuid4()).replace("-", "_"),
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "schema": self.schema,
        }
        daily_sql, daily_sql_params = self.jinja_sql.prepare_query(daily_sql, daily_sql_params)
        self._execute_raw_sql_query(table_name, daily_sql, start_date, end_date, bind_params=list(daily_sql_params))

    def populate_line_item_daily_summary_table(self, start_date, end_date, bill_ids):
        """Populate the daily aggregated summary of line items table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        table_name = self._table_map["line_item_daily_summary"]
        summary_sql = pkgutil.get_data("masu.database", "sql/reporting_awscostentrylineitem_daily_summary.sql")
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "uuid": str(uuid.uuid4()).replace("-", "_"),
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "schema": self.schema,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params)
        self._execute_raw_sql_query(
            table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params)
        )

    def populate_ui_summary_tables(self, start_date, end_date, source_uuid, tables=UI_SUMMARY_TABLES):
        """Populate our UI summary tables (formerly materialized views)."""
        for table_name in tables:
            summary_sql = pkgutil.get_data("masu.database", f"sql/aws/{table_name}.sql")
            summary_sql = summary_sql.decode("utf-8")
            summary_sql_params = {
                "start_date": start_date,
                "end_date": end_date,
                "schema": self.schema,
                "source_uuid": source_uuid,
            }
            summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params)
            self._execute_raw_sql_query(
                table_name,
                summary_sql,
                start_date,
                end_date,
                bind_params=list(summary_sql_params),
                operation="DELETE/INSERT",
            )

    def populate_line_item_daily_summary_table_presto(self, start_date, end_date, source_uuid, bill_id, markup_value):
        """Populate the daily aggregated summary of line items table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_awscostentrylineitem_daily_summary.sql")
        summary_sql = summary_sql.decode("utf-8")
        uuid_str = str(uuid.uuid4()).replace("-", "_")
        summary_sql_params = {
            "uuid": uuid_str,
            "start_date": start_date,
            "end_date": end_date,
            "schema": self.schema,
            "table": PRESTO_LINE_ITEM_DAILY_TABLE,
            "source_uuid": source_uuid,
            "year": start_date.strftime("%Y"),
            "month": start_date.strftime("%m"),
            "markup": markup_value if markup_value else 0,
            "bill_id": bill_id,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params)

        self._execute_presto_raw_sql_query(
            self.schema, summary_sql, log_ref="reporting_awscostentrylineitem_daily_summary.sql"
        )

    def mark_bill_as_finalized(self, bill_id):
        """Mark a bill in the database as finalized."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            bill = self._get_db_obj_query(table_name).get(id=bill_id)

            if bill.finalized_datetime is None:
                bill.finalized_datetime = self.date_accessor.today_with_timezone("UTC")
                bill.save()

    def populate_tags_summary_table(self, bill_ids, start_date, end_date):
        """Populate the line item aggregated totals data table."""
        table_name = self._table_map["tags_summary"]

        agg_sql = pkgutil.get_data("masu.database", "sql/reporting_awstags_summary.sql")
        agg_sql = agg_sql.decode("utf-8")
        agg_sql_params = {"schema": self.schema, "bill_ids": bill_ids, "start_date": start_date, "end_date": end_date}
        agg_sql, agg_sql_params = self.jinja_sql.prepare_query(agg_sql, agg_sql_params)
        self._execute_raw_sql_query(table_name, agg_sql, bind_params=list(agg_sql_params))

    def populate_ocp_on_aws_cost_daily_summary(self, start_date, end_date, cluster_id, bill_ids, markup_value):
        """Populate the daily cost aggregated summary for OCP on AWS.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        table_name = self._table_map["ocp_on_aws_daily_summary"]
        summary_sql = pkgutil.get_data("masu.database", "sql/reporting_ocpawscostlineitem_daily_summary.sql")
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "uuid": str(uuid.uuid4()).replace("-", "_"),
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "cluster_id": cluster_id,
            "schema": self.schema,
            "markup": markup_value,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params)

        self._execute_raw_sql_query(
            table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params)
        )

    def populate_ocp_on_aws_ui_summary_tables(self, sql_params, tables=OCPAWS_UI_SUMMARY_TABLES):
        """Populate our UI summary tables (formerly materialized views)."""
        for table_name in tables:
            summary_sql = pkgutil.get_data("masu.database", f"sql/aws/openshift/{table_name}.sql")
            summary_sql = summary_sql.decode("utf-8")
            summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, sql_params)
            self._execute_raw_sql_query(table_name, summary_sql, bind_params=list(summary_sql_params))

    def delete_ocp_on_aws_hive_partition_by_day(self, days, aws_source, ocp_source, year, month):
        """Deletes partitions individually for each day in days list."""
        table = "reporting_ocpawscostlineitem_project_daily_summary"
        retries = settings.HIVE_PARTITION_DELETE_RETRIES
        if self.table_exists_trino(table):
            LOG.info(
                "Deleting Hive partitions for the following: \n\tSchema: %s "
                "\n\tOCP Source: %s \n\tAWS Source: %s \n\tTable: %s \n\tYear-Month: %s-%s \n\tDays: %s",
                self.schema,
                ocp_source,
                aws_source,
                table,
                year,
                month,
                days,
            )
            for day in days:
                for i in range(retries):
                    try:
                        sql = f"""
                            DELETE FROM hive.{self.schema}.{table}
                                WHERE aws_source = '{aws_source}'
                                AND ocp_source = '{ocp_source}'
                                AND year = '{year}'
                                AND (month = replace(ltrim(replace('{month}', '0', ' ')),' ', '0') OR month = '{month}')
                                AND day = '{day}'"""
                        self._execute_presto_raw_sql_query(
                            self.schema,
                            sql,
                            log_ref=f"delete_ocp_on_aws_hive_partition_by_day for {year}-{month}-{day}",
                            attempts_left=(retries - 1) - i,
                        )
                        break
                    except TrinoExternalError as err:
                        if err.error_name == "HIVE_METASTORE_ERROR" and i < (retries - 1):
                            continue
                        else:
                            raise err

    def populate_ocp_on_aws_cost_daily_summary_presto(
        self,
        start_date,
        end_date,
        openshift_provider_uuid,
        aws_provider_uuid,
        report_period_id,
        bill_id,
        markup_value,
        distribution,
    ):
        """Populate the daily cost aggregated summary for OCP on AWS.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        # Default to cpu distribution
        year = start_date.strftime("%Y")
        month = start_date.strftime("%m")
        days = DateHelper().list_days(start_date, end_date)
        days_str = "','".join([str(day.day) for day in days])
        days_list = [str(day.day) for day in days]
        self.delete_ocp_on_aws_hive_partition_by_day(
            days_list, aws_provider_uuid, openshift_provider_uuid, year, month
        )

        pod_column = "pod_effective_usage_cpu_core_hours"
        node_column = "node_capacity_cpu_core_hours"
        if distribution == "memory":
            pod_column = "pod_effective_usage_memory_gigabyte_hours"
            node_column = "node_capacity_memory_gigabyte_hours"

        summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocpawscostlineitem_daily_summary.sql")
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "schema": self.schema,
            "start_date": start_date,
            "year": year,
            "month": month,
            "days": days_str,
            "end_date": end_date,
            "aws_source_uuid": aws_provider_uuid,
            "ocp_source_uuid": openshift_provider_uuid,
            "bill_id": bill_id,
            "report_period_id": report_period_id,
            "markup": markup_value,
            "pod_column": pod_column,
            "node_column": node_column,
        }
        LOG.info("Running OCP on AWS SQL with params:")
        LOG.info(summary_sql_params)
        self._execute_presto_multipart_sql_query(self.schema, summary_sql, bind_params=summary_sql_params)

    def back_populate_ocp_on_aws_daily_summary(self, start_date, end_date, report_period_id):
        """Populate the OCP on AWS and OCP daily summary tables. after populating the project table via trino."""
        table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_daily_summary"]

        sql = pkgutil.get_data(
            "masu.database", "sql/reporting_ocpawscostentrylineitem_daily_summary_back_populate.sql"
        )
        sql = sql.decode("utf-8")
        sql_params = {
            "schema": self.schema,
            "start_date": start_date,
            "end_date": end_date,
            "report_period_id": report_period_id,
        }
        sql, sql_params = self.jinja_sql.prepare_query(sql, sql_params)
        self._execute_raw_sql_query(table_name, sql, bind_params=list(sql_params))

    def populate_ocp_on_aws_tags_summary_table(self, bill_ids, start_date, end_date):
        """Populate the line item aggregated totals data table."""
        table_name = self._table_map["ocp_on_aws_tags_summary"]

        agg_sql = pkgutil.get_data("masu.database", "sql/reporting_ocpawstags_summary.sql")
        agg_sql = agg_sql.decode("utf-8")
        agg_sql_params = {"schema": self.schema, "bill_ids": bill_ids, "start_date": start_date, "end_date": end_date}
        agg_sql, agg_sql_params = self.jinja_sql.prepare_query(agg_sql, agg_sql_params)
        self._execute_raw_sql_query(table_name, agg_sql, bind_params=list(agg_sql_params))

    def populate_markup_cost(self, provider_uuid, markup, start_date, end_date, bill_ids=None):
        """Set markup costs in the database."""
        with schema_context(self.schema):
            if bill_ids and start_date and end_date:
                date_filters = {"usage_start__gte": start_date, "usage_start__lte": end_date}
            else:
                date_filters = {}

            OCPALL_MARKUP = (OCPAllCostLineItemDailySummaryP, *OCP_ON_ALL_PERSPECTIVES)
            for bill_id in bill_ids:
                AWSCostEntryLineItemDailySummary.objects.filter(cost_entry_bill_id=bill_id, **date_filters).update(
                    markup_cost=(F("unblended_cost") * markup),
                    markup_cost_blended=(F("blended_cost") * markup),
                    markup_cost_savingsplan=(F("savingsplan_effective_cost") * markup),
                )

                OCPAWSCostLineItemDailySummaryP.objects.filter(cost_entry_bill_id=bill_id, **date_filters).update(
                    markup_cost=(F("unblended_cost") * markup)
                )
                for ocpaws_model in OCP_ON_AWS_PERSPECTIVES:
                    ocpaws_model.objects.filter(source_uuid=provider_uuid, **date_filters).update(
                        markup_cost=(F("unblended_cost") * markup)
                    )

                OCPAllCostLineItemProjectDailySummaryP.objects.filter(
                    source_uuid=provider_uuid, source_type="AWS", **date_filters
                ).update(project_markup_cost=(F("pod_cost") * markup))

                for markup_model in OCPALL_MARKUP:
                    markup_model.objects.filter(source_uuid=provider_uuid, source_type="AWS", **date_filters).update(
                        markup_cost=(F("unblended_cost") * markup)
                    )

    def populate_enabled_tag_keys(self, start_date, end_date, bill_ids):
        """Populate the enabled tag key table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.
            bill_ids (list) A list of bill IDs.

        Returns
            (None)
        """
        table_name = self._table_map["enabled_tag_keys"]
        summary_sql = pkgutil.get_data("masu.database", "sql/reporting_awsenabledtagkeys.sql")
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "schema": self.schema,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params)
        self._execute_raw_sql_query(
            table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params)
        )

    def update_line_item_daily_summary_with_enabled_tags(self, start_date, end_date, bill_ids):
        """Populate the enabled tag key table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.
            bill_ids (list) A list of bill IDs.

        Returns
            (None)
        """
        table_name = self._table_map["line_item_daily_summary"]
        summary_sql = pkgutil.get_data(
            "masu.database", "sql/reporting_awscostentryline_item_daily_summary_update_enabled_tags.sql"
        )
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "schema": self.schema,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params)
        self._execute_raw_sql_query(
            table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params)
        )

    def get_openshift_on_cloud_matched_tags(self, aws_bill_id, ocp_report_period_id):
        """Return a list of matched tags."""
        sql = pkgutil.get_data("masu.database", "sql/reporting_ocpaws_matched_tags.sql")
        sql = sql.decode("utf-8")
        sql_params = {"bill_id": aws_bill_id, "report_period_id": ocp_report_period_id, "schema": self.schema}
        sql, bind_params = self.jinja_sql.prepare_query(sql, sql_params)
        with connection.cursor() as cursor:
            cursor.db.set_schema(self.schema)
            cursor.execute(sql, params=bind_params)
            results = cursor.fetchall()

        return [json.loads(result[0]) for result in results]

    def get_openshift_on_cloud_matched_tags_trino(self, aws_source_uuid, ocp_source_uuid, start_date, end_date):
        """Return a list of matched tags."""
        sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocpaws_matched_tags.sql")
        sql = sql.decode("utf-8")

        days = DateHelper().list_days(start_date, end_date)
        days_str = "','".join([str(day.day) for day in days])

        sql_params = {
            "start_date": start_date,
            "end_date": end_date,
            "schema": self.schema,
            "aws_source_uuid": aws_source_uuid,
            "ocp_source_uuid": ocp_source_uuid,
            "year": start_date.strftime("%Y"),
            "month": start_date.strftime("%m"),
            "days": days_str,
        }
        sql, sql_params = self.jinja_sql.prepare_query(sql, sql_params)
        results = self._execute_presto_raw_sql_query(
            self.schema, sql, bind_params=sql_params, log_ref="reporting_ocpaws_matched_tags.sql"
        )

        return [json.loads(result[0]) for result in results]
Exemplo n.º 14
0
class OCPReportSummaryUpdater(PartitionHandlerMixin):
    """Class to update OCP report summary data."""

    def __init__(self, schema, provider, manifest):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with

        """
        self._schema = schema
        self._provider = provider
        self._manifest = manifest
        self._cluster_id = get_cluster_id_from_provider(self._provider.uuid)
        self._date_accessor = DateAccessor()

    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        for start, end in date_range_pair(start_date, end_date):
            LOG.info(
                "Updating OpenShift report daily tables for \n\tSchema: %s "
                "\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s",
                self._schema,
                self._provider.uuid,
                self._cluster_id,
                start,
                end,
            )
            with OCPReportDBAccessor(self._schema) as accessor:
                accessor.populate_node_label_line_item_daily_table(start, end, self._cluster_id)
                accessor.populate_line_item_daily_table(start, end, self._cluster_id)
                accessor.populate_storage_line_item_daily_table(start, end, self._cluster_id)

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        with schema_context(self._schema):
            self._handle_partitions(self._schema, UI_SUMMARY_TABLES, start_date, end_date)

        report_period = None
        with OCPReportDBAccessor(self._schema) as accessor:
            report_period = accessor.report_periods_for_provider_uuid(self._provider.uuid, start_date)
            with schema_context(self._schema):
                report_period_ids = [report_period.id]
            for start, end in date_range_pair(start_date, end_date):
                LOG.info(
                    "Updating OpenShift report summary tables for \n\tSchema: %s "
                    "\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s",
                    self._schema,
                    self._provider.uuid,
                    self._cluster_id,
                    start,
                    end,
                )
                accessor.populate_line_item_daily_summary_table(start, end, self._cluster_id, self._provider.uuid)
                accessor.populate_storage_line_item_daily_summary_table(
                    start, end, self._cluster_id, self._provider.uuid
                )
                accessor.populate_ui_summary_tables(start, end, self._provider.uuid)
            accessor.populate_pod_label_summary_table(report_period_ids, start_date, end_date)
            accessor.populate_volume_label_summary_table(report_period_ids, start_date, end_date)
            accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, report_period_ids)

            if report_period.summary_data_creation_datetime is None:
                report_period.summary_data_creation_datetime = self._date_accessor.today_with_timezone("UTC")
            report_period.summary_data_updated_datetime = self._date_accessor.today_with_timezone("UTC")
            report_period.save()

        self.check_cluster_infrastructure(start_date, end_date)

        return start_date, end_date

    def check_cluster_infrastructure(self, start_date, end_date):
        LOG.info("Checking if OpenShift cluster %s is running on cloud infrastructure.", self._provider.uuid)
        updater_base = OCPCloudUpdaterBase(self._schema, self._provider, self._manifest)
        infra_map = updater_base.get_infra_map_from_providers()
        if not infra_map:
            # Check the cluster to see if it is running on cloud infrastructure
            infra_map = updater_base._generate_ocp_infra_map_from_sql(start_date, end_date)
        if infra_map:
            for ocp_source, infra_tuple in infra_map.items():
                LOG.info(
                    "OpenShift cluster %s is running on %s source %s.", ocp_source, infra_tuple[1], infra_tuple[0]
                )

    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        # Default to this month's bill
        with OCPReportDBAccessor(self._schema) as accessor:
            if self._manifest:
                # Override the bill date to correspond with the manifest
                bill_date = self._manifest.billing_period_start_datetime.date()
                report_periods = accessor.get_usage_period_query_by_provider(self._provider.uuid)
                report_periods = report_periods.filter(report_period_start=bill_date).all()
                do_month_update = True
                with schema_context(self._schema):
                    if report_periods is not None and len(report_periods) > 0:
                        do_month_update = self._determine_if_full_summary_update_needed(report_periods[0])
                if do_month_update:
                    last_day_of_month = calendar.monthrange(bill_date.year, bill_date.month)[1]
                    start_date = bill_date.strftime("%Y-%m-%d")
                    end_date = bill_date.replace(day=last_day_of_month)
                    end_date = end_date.strftime("%Y-%m-%d")
                    LOG.info("Overriding start and end date to process full month.")
                LOG.info("Returning start: %s, end: %s", str(start_date), str(end_date))
        return start_date, end_date

    def _determine_if_full_summary_update_needed(self, report_period):
        """Decide whether to update summary tables for full billing period."""
        summary_creation = report_period.summary_data_creation_datetime
        is_done_processing = False
        with ReportManifestDBAccessor() as manifest_accesor:
            is_done_processing = manifest_accesor.manifest_ready_for_summary(self._manifest.id)
        is_new_period = summary_creation is None

        # Run the full month if this is the first time we've seen this report
        # period
        if is_done_processing and is_new_period:
            return True

        return False
class AzureReportSummaryUpdater(PartitionHandlerMixin):
    """Class to update Azure report summary data."""

    def __init__(self, schema, provider, manifest):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with

        """
        self._schema = schema
        self._provider = provider
        self._manifest = manifest
        self._date_accessor = DateAccessor()

    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        with AzureReportDBAccessor(self._schema) as accessor:
            # This is the normal processing route
            if self._manifest:
                # Override the bill date to correspond with the manifest
                bill_date = self._manifest.billing_period_start_datetime.date()
                bills = accessor.get_cost_entry_bills_query_by_provider(self._provider.uuid)
                bills = bills.filter(billing_period_start=bill_date).all()
                first_bill = bills.filter(billing_period_start=bill_date).first()
                do_month_update = False
                with schema_context(self._schema):
                    if first_bill:
                        do_month_update = self._determine_if_full_summary_update_needed(first_bill)
                if do_month_update:
                    last_day_of_month = calendar.monthrange(bill_date.year, bill_date.month)[1]
                    start_date = bill_date.strftime("%Y-%m-%d")
                    end_date = bill_date.replace(day=last_day_of_month)
                    end_date = end_date.strftime("%Y-%m-%d")
                    LOG.info("Overriding start and end date to process full month.")

        return start_date, end_date

    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str): A start date and end date.

        """
        LOG.info("update_daily_tables for: %s-%s", str(start_date), str(end_date))
        start_date, end_date = self._get_sql_inputs(start_date, end_date)

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        LOG.info("update_summary_tables for: %s-%s", str(start_date), str(end_date))
        start_date, end_date = self._get_sql_inputs(start_date, end_date)

        with schema_context(self._schema):
            self._handle_partitions(self._schema, UI_SUMMARY_TABLES, start_date, end_date)

        bills = get_bills_from_provider(
            self._provider.uuid,
            self._schema,
            datetime.datetime.strptime(start_date, "%Y-%m-%d"),
            datetime.datetime.strptime(end_date, "%Y-%m-%d"),
        )
        bill_ids = []
        with schema_context(self._schema):
            bill_ids = [str(bill.id) for bill in bills]

        with AzureReportDBAccessor(self._schema) as accessor:
            # Need these bills on the session to update dates after processing
            bills = accessor.bills_for_provider_uuid(self._provider.uuid, start_date)
            for start, end in date_range_pair(start_date, end_date):
                LOG.info(
                    "Updating Azure report summary tables: \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s",
                    self._schema,
                    self._provider.uuid,
                    start,
                    end,
                )
                accessor.populate_line_item_daily_summary_table(start, end, bill_ids)
                accessor.populate_ui_summary_tables(start, end, self._provider.uuid)
            accessor.populate_tags_summary_table(bill_ids, start_date, end_date)
            for bill in bills:
                if bill.summary_data_creation_datetime is None:
                    bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone("UTC")
                bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone("UTC")
                bill.save()

        return start_date, end_date

    def _determine_if_full_summary_update_needed(self, bill):
        """Decide whether to update summary tables for full billing period."""
        summary_creation = bill.summary_data_creation_datetime

        is_done_processing = False
        with ReportManifestDBAccessor() as manifest_accesor:
            is_done_processing = manifest_accesor.manifest_ready_for_summary(self._manifest.id)

        is_new_bill = summary_creation is None

        # Do a full month update if we just finished processing a finalized
        # bill or we just finished processing a bill for the first time
        if is_done_processing and is_new_bill:
            return True

        return False
Exemplo n.º 16
0
class AWSReportDBAccessor(ReportDBAccessorBase):
    """Class to interact with customer reporting tables."""
    def __init__(self, schema):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with
        """
        super().__init__(schema)
        self._datetime_format = Config.AWS_DATETIME_STR_FORMAT
        self.date_accessor = DateAccessor()
        self.jinja_sql = JinjaSql()

    def get_cost_entry_bills(self):
        """Get all cost entry bill objects."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            columns = [
                "id", "bill_type", "payer_account_id", "billing_period_start",
                "provider_id"
            ]
            bills = self._get_db_obj_query(table_name).values(*columns)
            return {(bill["bill_type"], bill["payer_account_id"],
                     bill["billing_period_start"], bill["provider_id"]):
                    bill["id"]
                    for bill in bills}

    def get_cost_entry_bills_by_date(self, start_date):
        """Return a cost entry bill for the specified start date."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            return self._get_db_obj_query(table_name).filter(
                billing_period_start=start_date)

    def get_cost_entry_bills_query_by_provider(self, provider_uuid):
        """Return all cost entry bills for the specified provider."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            return self._get_db_obj_query(table_name).filter(
                provider_id=provider_uuid)

    def bills_for_provider_uuid(self, provider_uuid, start_date=None):
        """Return all cost entry bills for provider_uuid on date."""
        bills = self.get_cost_entry_bills_query_by_provider(provider_uuid)
        if start_date:
            if isinstance(start_date, str):
                start_date = parse(start_date)
            bill_date = start_date.replace(day=1)
            bills = bills.filter(billing_period_start=bill_date)
        return bills

    def get_bill_query_before_date(self, date, provider_uuid=None):
        """Get the cost entry bill objects with billing period before provided date."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            if provider_uuid:
                cost_entry_bill_query = base_query.filter(
                    billing_period_start__lte=date, provider_id=provider_uuid)
            else:
                cost_entry_bill_query = base_query.filter(
                    billing_period_start__lte=date)
            return cost_entry_bill_query

    def get_lineitem_query_for_billid(self, bill_id):
        """Get the AWS cost entry line item for a given bill query."""
        table_name = AWSCostEntryLineItem
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            line_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return line_item_query

    def get_daily_query_for_billid(self, bill_id):
        """Get the AWS cost daily item for a given bill query."""
        table_name = AWSCostEntryLineItemDaily
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            daily_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return daily_item_query

    def get_summary_query_for_billid(self, bill_id):
        """Get the AWS cost summary item for a given bill query."""
        table_name = AWSCostEntryLineItemDailySummary
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return summary_item_query

    def get_ocp_aws_summary_query_for_billid(self, bill_id):
        """Get the OCP-on-AWS report summary item for a given bill query."""
        table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_daily_summary"]
        base_query = self._get_db_obj_query(table_name)
        summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
        return summary_item_query

    def get_ocp_aws_project_summary_query_for_billid(self, bill_id):
        """Get the OCP-on-AWS report project summary item for a given bill query."""
        table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_project_daily_summary"]
        base_query = self._get_db_obj_query(table_name)
        summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
        return summary_item_query

    def get_cost_entry_query_for_billid(self, bill_id):
        """Get the AWS cost entry data for a given bill query."""
        table_name = AWSCostEntry
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            line_item_query = base_query.filter(bill_id=bill_id)
            return line_item_query

    def get_cost_entries(self):
        """Make a mapping of cost entries by start time."""
        table_name = AWSCostEntry
        with schema_context(self.schema):
            cost_entries = self._get_db_obj_query(table_name).all()

            return {(ce.bill_id,
                     ce.interval_start.strftime(self._datetime_format)): ce.id
                    for ce in cost_entries}

    def get_products(self):
        """Make a mapping of product sku to product objects."""
        table_name = AWSCostEntryProduct
        with schema_context(self.schema):
            columns = ["id", "sku", "product_name", "region"]
            products = self._get_db_obj_query(table_name,
                                              columns=columns).all()

            return {(product["sku"], product["product_name"],
                     product["region"]): product["id"]
                    for product in products}

    def get_pricing(self):
        """Make a mapping of pricing values string to pricing objects."""
        table_name = AWSCostEntryPricing
        with schema_context(self.schema):
            pricing = self._get_db_obj_query(table_name).all()

            return {f"{p.term}-{p.unit}": p.id for p in pricing}

    def get_reservations(self):
        """Make a mapping of reservation ARN to reservation objects."""
        table_name = AWSCostEntryReservation
        with schema_context(self.schema):
            columns = ["id", "reservation_arn"]
            reservs = self._get_db_obj_query(table_name, columns=columns).all()

            return {res["reservation_arn"]: res["id"] for res in reservs}

    def populate_line_item_daily_table(self, start_date, end_date, bill_ids):
        """Populate the daily aggregate of line items table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.
            bill_ids (list)

        Returns
            (None)

        """
        table_name = AWS_CUR_TABLE_MAP["line_item_daily"]

        daily_sql = pkgutil.get_data(
            "masu.database", "sql/reporting_awscostentrylineitem_daily.sql")
        daily_sql = daily_sql.decode("utf-8")
        daily_sql_params = {
            "uuid": str(uuid.uuid4()).replace("-", "_"),
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "schema": self.schema,
        }
        daily_sql, daily_sql_params = self.jinja_sql.prepare_query(
            daily_sql, daily_sql_params)
        self._execute_raw_sql_query(table_name,
                                    daily_sql,
                                    start_date,
                                    end_date,
                                    bind_params=list(daily_sql_params))

    def populate_line_item_daily_summary_table(self, start_date, end_date,
                                               bill_ids):
        """Populate the daily aggregated summary of line items table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        table_name = AWS_CUR_TABLE_MAP["line_item_daily_summary"]
        summary_sql = pkgutil.get_data(
            "masu.database",
            "sql/reporting_awscostentrylineitem_daily_summary.sql")
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "uuid": str(uuid.uuid4()).replace("-", "_"),
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "schema": self.schema,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(
            summary_sql, summary_sql_params)
        self._execute_raw_sql_query(table_name,
                                    summary_sql,
                                    start_date,
                                    end_date,
                                    bind_params=list(summary_sql_params))

    def populate_line_item_daily_summary_table_presto(self, start_date,
                                                      end_date, source_uuid,
                                                      bill_id, markup_value):
        """Populate the daily aggregated summary of line items table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        summary_sql = pkgutil.get_data(
            "masu.database",
            "presto_sql/reporting_awscostentrylineitem_daily_summary.sql")
        summary_sql = summary_sql.decode("utf-8")
        uuid_str = str(uuid.uuid4()).replace("-", "_")
        summary_sql_params = {
            "uuid": uuid_str,
            "start_date": start_date,
            "end_date": end_date,
            "schema": self.schema,
            "table": PRESTO_LINE_ITEM_TABLE,
            "source_uuid": source_uuid,
            "year": start_date.strftime("%Y"),
            "month": start_date.strftime("%m"),
            "markup": markup_value if markup_value else 0,
            "bill_id": bill_id,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(
            summary_sql, summary_sql_params)

        LOG.info(f"Summary SQL: {str(summary_sql)}")
        self._execute_presto_raw_sql_query(self.schema, summary_sql)

    def mark_bill_as_finalized(self, bill_id):
        """Mark a bill in the database as finalized."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            bill = self._get_db_obj_query(table_name).get(id=bill_id)

            if bill.finalized_datetime is None:
                bill.finalized_datetime = self.date_accessor.today_with_timezone(
                    "UTC")
                bill.save()

    def populate_tags_summary_table(self, bill_ids):
        """Populate the line item aggregated totals data table."""
        table_name = AWS_CUR_TABLE_MAP["tags_summary"]

        agg_sql = pkgutil.get_data("masu.database",
                                   "sql/reporting_awstags_summary.sql")
        agg_sql = agg_sql.decode("utf-8")
        agg_sql_params = {"schema": self.schema, "bill_ids": bill_ids}
        agg_sql, agg_sql_params = self.jinja_sql.prepare_query(
            agg_sql, agg_sql_params)
        self._execute_raw_sql_query(table_name,
                                    agg_sql,
                                    bind_params=list(agg_sql_params))

    def populate_ocp_on_aws_cost_daily_summary(self, start_date, end_date,
                                               cluster_id, bill_ids,
                                               markup_value):
        """Populate the daily cost aggregated summary for OCP on AWS.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_daily_summary"]
        summary_sql = pkgutil.get_data(
            "masu.database",
            "sql/reporting_ocpawscostlineitem_daily_summary.sql")
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "uuid": str(uuid.uuid4()).replace("-", "_"),
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "cluster_id": cluster_id,
            "schema": self.schema,
            "markup": markup_value,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(
            summary_sql, summary_sql_params)

        self._execute_raw_sql_query(table_name,
                                    summary_sql,
                                    start_date,
                                    end_date,
                                    bind_params=list(summary_sql_params))

    def populate_ocp_on_aws_tags_summary_table(self):
        """Populate the line item aggregated totals data table."""
        table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_tags_summary"]

        agg_sql = pkgutil.get_data("masu.database",
                                   "sql/reporting_ocpawstags_summary.sql")
        agg_sql = agg_sql.decode("utf-8")
        agg_sql_params = {"schema": self.schema}
        agg_sql, agg_sql_params = self.jinja_sql.prepare_query(
            agg_sql, agg_sql_params)
        self._execute_raw_sql_query(table_name,
                                    agg_sql,
                                    bind_params=list(agg_sql_params))

    def populate_markup_cost(self,
                             markup,
                             start_date,
                             end_date,
                             bill_ids=None):
        """Set markup costs in the database."""
        with schema_context(self.schema):
            if bill_ids and start_date and end_date:
                for bill_id in bill_ids:
                    AWSCostEntryLineItemDailySummary.objects.filter(
                        cost_entry_bill_id=bill_id,
                        usage_start__gte=start_date,
                        usage_start__lte=end_date).update(
                            markup_cost=(F("unblended_cost") * markup))
            elif bill_ids:
                for bill_id in bill_ids:
                    AWSCostEntryLineItemDailySummary.objects.filter(
                        cost_entry_bill_id=bill_id).update(
                            markup_cost=(F("unblended_cost") * markup))

    def populate_enabled_tag_keys(self, start_date, end_date, bill_ids):
        """Populate the enabled tag key table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.
            bill_ids (list) A list of bill IDs.

        Returns
            (None)
        """
        table_name = AWS_CUR_TABLE_MAP["enabled_tag_keys"]
        summary_sql = pkgutil.get_data("masu.database",
                                       "sql/reporting_awsenabledtagkeys.sql")
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "schema": self.schema,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(
            summary_sql, summary_sql_params)
        self._execute_raw_sql_query(table_name,
                                    summary_sql,
                                    start_date,
                                    end_date,
                                    bind_params=list(summary_sql_params))

    def update_line_item_daily_summary_with_enabled_tags(
            self, start_date, end_date, bill_ids):
        """Populate the enabled tag key table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.
            bill_ids (list) A list of bill IDs.

        Returns
            (None)
        """
        table_name = AWS_CUR_TABLE_MAP["line_item_daily_summary"]
        summary_sql = pkgutil.get_data(
            "masu.database",
            "sql/reporting_awscostentryline_item_daily_summary_update_enabled_tags.sql"
        )
        summary_sql = summary_sql.decode("utf-8")
        summary_sql_params = {
            "start_date": start_date,
            "end_date": end_date,
            "bill_ids": bill_ids,
            "schema": self.schema,
        }
        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(
            summary_sql, summary_sql_params)
        self._execute_raw_sql_query(table_name,
                                    summary_sql,
                                    start_date,
                                    end_date,
                                    bind_params=list(summary_sql_params))
Exemplo n.º 17
0
class ReportManifestDBAccessor(KokuDBAccess):
    """Class to interact with the koku database for CUR processing statistics."""

    def __init__(self):
        """Access the AWS report manifest database table."""
        self._schema = 'public'
        super().__init__(self._schema)
        self._table = CostUsageReportManifest
        self.date_accessor = DateAccessor()

    def get_manifest(self, assembly_id, provider_uuid):
        """Get the manifest associated with the provided provider and id."""
        query = self._get_db_obj_query()
        return query.filter(provider_id=provider_uuid)\
            .filter(assembly_id=assembly_id).first()

    def get_manifest_by_id(self, manifest_id):
        """Get the manifest by id."""
        with schema_context(self._schema):
            query = self._get_db_obj_query()
            return query.filter(id=manifest_id).first()

    def mark_manifest_as_updated(self, manifest):
        """Update the updated timestamp."""
        manifest.manifest_updated_datetime = \
            self.date_accessor.today_with_timezone('UTC')
        manifest.save()

    # pylint: disable=arguments-differ
    def add(self, **kwargs):
        """
        Add a new row to the CUR stats database.

        Args:
            kwargs (dict): Fields containing CUR Manifest attributes.
                Valid keys are: assembly_id,
                                billing_period_start_datetime,
                                num_processed_files (optional),
                                num_total_files,
                                provider_uuid,
        Returns:
            None

        """
        if 'manifest_creation_datetime' not in kwargs:
            kwargs['manifest_creation_datetime'] = \
                self.date_accessor.today_with_timezone('UTC')

        if 'num_processed_files' not in kwargs:
            kwargs['num_processed_files'] = 0

        # The Django model insists on calling this field provider_id
        if 'provider_uuid' in kwargs:
            uuid = kwargs.pop('provider_uuid')
            kwargs['provider_id'] = uuid

        return super().add(**kwargs)

    # pylint: disable=no-self-use
    def get_last_report_completed_datetime(self, manifest_id):
        """Get the most recent report processing completion time for a manifest."""
        result = CostUsageReportStatus.objects.\
            filter(manifest_id=manifest_id).order_by('-last_completed_datetime').first()
        return result.last_completed_datetime

    def reset_manifest(self, manifest_id):
        """Return the manifest to a state as if it had not been processed.

        This sets the number of processed files to zero and
        nullifies the started and completed times on the reports.
        """
        manifest = self.get_manifest_by_id(manifest_id)
        manifest.num_processed_files = 0
        manifest.save()

        files = CostUsageReportStatus.objects.filter(id=manifest_id).all()
        for file in files:
            file.last_completed_datetime = None
            file.last_started_datetime = None
            file.save()

    def get_manifest_list_for_provider_and_bill_date(self, provider_uuid, bill_date):
        """Return all manifests for a provider and bill date."""
        filters = {
            'provider_id': provider_uuid,
            'billing_period_start_datetime__date': bill_date
        }
        return CostUsageReportManifest.objects.\
            filter(**filters).all()
Exemplo n.º 18
0
class ReportManifestDBAccessor(KokuDBAccess):
    """Class to interact with the koku database for CUR processing statistics."""
    def __init__(self):
        """Access the AWS report manifest database table."""
        self._schema = "public"
        super().__init__(self._schema)
        self._table = CostUsageReportManifest
        self.date_accessor = DateAccessor()

    def get_manifest(self, assembly_id, provider_uuid):
        """Get the manifest associated with the provided provider and id."""
        query = self._get_db_obj_query()
        return query.filter(provider_id=provider_uuid).filter(
            assembly_id=assembly_id).first()

    def get_manifest_by_id(self, manifest_id):
        """Get the manifest by id."""
        with schema_context(self._schema):
            query = self._get_db_obj_query()
            return query.filter(id=manifest_id).first()

    def mark_manifest_as_updated(self, manifest):
        """Update the updated timestamp."""
        if manifest:
            manifest.manifest_updated_datetime = self.date_accessor.today_with_timezone(
                "UTC")
            manifest.save()

    def mark_manifest_as_completed(self, manifest):
        """Update the updated timestamp."""
        if manifest:
            manifest.manifest_completed_datetime = self.date_accessor.today_with_timezone(
                "UTC")
            manifest.save()

    # pylint: disable=arguments-differ
    def add(self, **kwargs):
        """
        Add a new row to the CUR stats database.

        Args:
            kwargs (dict): Fields containing CUR Manifest attributes.
                Valid keys are: assembly_id,
                                billing_period_start_datetime,
                                num_processed_files (optional),
                                num_total_files,
                                provider_uuid,
        Returns:
            None

        """
        if "manifest_creation_datetime" not in kwargs:
            kwargs[
                "manifest_creation_datetime"] = self.date_accessor.today_with_timezone(
                    "UTC")

        if "num_processed_files" not in kwargs:
            kwargs["num_processed_files"] = 0

        # The Django model insists on calling this field provider_id
        if "provider_uuid" in kwargs:
            uuid = kwargs.pop("provider_uuid")
            kwargs["provider_id"] = uuid

        return super().add(**kwargs)

    # pylint: disable=no-self-use
    def get_last_report_completed_datetime(self, manifest_id):
        """Get the most recent report processing completion time for a manifest."""
        result = (CostUsageReportStatus.objects.filter(
            manifest_id=manifest_id).order_by(
                "-last_completed_datetime").first())
        if result:
            return result.last_completed_datetime
        return None

    def reset_manifest(self, manifest_id):
        """Return the manifest to a state as if it had not been processed.

        This sets the number of processed files to zero and
        nullifies the started and completed times on the reports.
        """
        manifest = self.get_manifest_by_id(manifest_id)
        manifest.num_processed_files = 0
        manifest.save()

        files = CostUsageReportStatus.objects.filter(id=manifest_id).all()
        for file in files:
            file.last_completed_datetime = None
            file.last_started_datetime = None
            file.save()

    def get_manifest_list_for_provider_and_bill_date(self, provider_uuid,
                                                     bill_date):
        """Return all manifests for a provider and bill date."""
        filters = {
            "provider_id": provider_uuid,
            "billing_period_start_datetime__date": bill_date
        }
        return CostUsageReportManifest.objects.filter(**filters).all()

    def get_last_seen_manifest_ids(self, bill_date):
        """Return a tuple containing the assembly_id of the last seen manifest and a boolean

        The boolean will state whether or not that manifest has been processed."""
        assembly_ids = []
        # The following query uses a window function to rank the manifests for all the providers,
        # and then just pulls out the top ranked (most recent) manifests
        manifests = (CostUsageReportManifest.objects.filter(
            billing_period_start_datetime=bill_date).annotate(
                row_number=Window(
                    expression=RowNumber(),
                    partition_by=F("provider_id"),
                    order_by=F("manifest_creation_datetime").desc(),
                )).order_by("row_number"))
        for manifest in [
                manifest for manifest in manifests if manifest.row_number == 1
        ]:
            # loop through the manifests and decide if they have finished processing
            processed = manifest.num_total_files == manifest.num_processed_files
            # if all of the files for the manifest have been processed we don't want to add it
            # to assembly_ids because it is safe to delete
            if not processed:
                assembly_ids.append(manifest.assembly_id)
        return assembly_ids

    def purge_expired_report_manifest(self, provider_type, expired_date):
        """
        Deletes Cost usage Report Manifests older than expired_date.

        Args:
            provider_type   (String) the provider type to delete associated manifests
            expired_date (datetime.datetime) delete all manifests older than this date, exclusive.
        """
        delete_count = CostUsageReportManifest.objects.filter(
            provider__type=provider_type,
            billing_period_start_datetime__lt=expired_date).delete()[0]
        LOG.info(
            "Removed %s CostUsageReportManifest(s) for provider type %s that had a billing period start date before %s",
            delete_count,
            provider_type,
            expired_date,
        )

    def purge_expired_report_manifest_provider_uuid(self, provider_uuid,
                                                    expired_date):
        """
        Delete cost usage reports older than expired_date and provider_uuid.

        Args:
            provider_uuid (uuid) The provider uuid to use to delete associated manifests
            expired_date (datetime.datetime) delete all manifests older than this date, exclusive.
        """
        delete_count = CostUsageReportManifest.objects.filter(
            provider_id=provider_uuid,
            billing_period_start_datetime__lt=expired_date).delete()
        LOG.info(
            "Removed %s CostUsageReportManifest(s) for provider_uuid %s that had a billing period start date before %s",
            delete_count,
            provider_uuid,
            expired_date,
        )
Exemplo n.º 19
0
class ReportManifestDBAccessor(KokuDBAccess):
    """Class to interact with the koku database for CUR processing statistics."""
    def __init__(self):
        """Access the AWS report manifest database table."""
        self._schema = 'public'
        super().__init__(self._schema)
        self._table = \
            self.get_base().classes.reporting_common_costusagereportmanifest
        self.date_accessor = DateAccessor()

    def get_manifest(self, assembly_id, provider_id):
        """Get the manifest associated with the provided provider and id."""
        query = self._get_db_obj_query()
        return query.filter_by(provider_id=provider_id)\
            .filter_by(assembly_id=assembly_id).first()

    def get_manifest_by_id(self, manifest_id):
        """Get the manifest by id."""
        query = self._get_db_obj_query()
        return query.filter_by(id=manifest_id).first()

    def mark_manifest_as_updated(self, manifest):
        """Update the updated timestamp."""
        manifest.manifest_updated_datetime = \
            self.date_accessor.today_with_timezone('UTC')

    def add(self, use_savepoint=True, **kwargs):
        """
        Add a new row to the CUR stats database.

        Args:
            kwargs (dict): Fields containing CUR Manifest attributes.

            Valid keys are: assembly_id,
                            billing_period_start_datetime,
                            num_processed_files (optional),
                            num_total_files,
                            provider_id,
        Returns:
            None

        """
        if 'manifest_creation_datetime' not in kwargs:
            kwargs['manifest_creation_datetime'] = \
                self.date_accessor.today_with_timezone('UTC')

        if 'num_processed_files' not in kwargs:
            kwargs['num_processed_files'] = 0

        return super().add(use_savepoint, **kwargs)

    def get_last_report_completed_datetime(self, manifest_id):
        """Get the most recent report processing completion time for a manifest."""
        table = self.get_base().classes.reporting_common_costusagereportstatus
        result = self._session.query(func.max(table.last_completed_datetime))\
            .filter(table.manifest_id == manifest_id)\
            .first()

        return result[0]

    def reset_manifest(self, manifest_id):
        """Return the manifest to a state as if it had not been processed.

        This sets the number of processed files to zero and
        nullifies the started and completed times on the reports.
        """
        manifest = self.get_manifest_by_id(manifest_id)
        manifest.num_processed_files = 0

        table = self.get_base().classes.reporting_common_costusagereportstatus
        files = self._session.query(table).filter(table.manifest_id == manifest_id)\
            .all()
        for file in files:
            file.last_completed_datetime = None
            file.last_started_datetime = None

        self.commit()
Exemplo n.º 20
0
class OCPReportParquetSummaryUpdater:
    """Class to update OCP report summary data from Presto/Parquet data."""
    def __init__(self, schema, provider, manifest):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with

        """
        self._schema = schema
        self._provider = provider
        self._manifest = manifest
        self._cluster_id = get_cluster_id_from_provider(self._provider.uuid)
        self._cluster_alias = get_cluster_alias_from_cluster_id(
            self._cluster_id)
        self._date_accessor = DateAccessor()

    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        with OCPReportDBAccessor(self._schema) as accessor:
            # This is the normal processing route
            if self._manifest:
                # Override the bill date to correspond with the manifest
                bill_date = self._manifest.billing_period_start_datetime.date()
                report_periods = accessor.get_usage_period_query_by_provider(
                    self._provider.uuid)
                report_periods = report_periods.filter(
                    report_period_start=bill_date).all()
                first_period = report_periods.first()
                do_month_update = False
                with schema_context(self._schema):
                    if first_period:
                        do_month_update = determine_if_full_summary_update_needed(
                            first_period)
                if do_month_update:
                    last_day_of_month = calendar.monthrange(
                        bill_date.year, bill_date.month)[1]
                    start_date = bill_date
                    end_date = bill_date.replace(day=last_day_of_month)
                    LOG.info(
                        "Overriding start and end date to process full month.")

        if isinstance(start_date, str):
            start_date = ciso8601.parse_datetime(start_date).date()
        if isinstance(end_date, str):
            end_date = ciso8601.parse_datetime(end_date).date()

        return start_date, end_date

    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        LOG.info("NO-OP update_daily_tables for: %s-%s", str(start_date),
                 str(end_date))

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)

        report_periods = None
        with OCPReportDBAccessor(self._schema) as accessor:
            with schema_context(self._schema):
                report_periods = accessor.report_periods_for_provider_uuid(
                    self._provider.uuid, start_date)
                report_period_ids = [
                    report_period.id for report_period in report_periods
                ]

            for report_period in report_periods:
                LOG.info(
                    "Updating OpenShift report summary tables for \n\tSchema: %s "
                    "\n\tProvider: %s \n\tCluster: %s \n\tReport Period ID: %s \n\tDates: %s - %s",
                    self._schema,
                    self._provider.uuid,
                    self._cluster_id,
                    report_period.id,
                    start_date,
                    end_date,
                )
                # This will process POD and STORAGE together
                accessor.populate_line_item_daily_summary_table_presto(
                    start_date, end_date, report_period.id, self._cluster_id,
                    self._cluster_alias, self._provider.uuid)

            # This will process POD and STORAGE together
            LOG.info(
                "Updating OpenShift label summary tables for \n\tSchema: %s "
                "\n\tReport Period IDs: %s",
                self._schema,
                report_period_ids,
            )
            accessor.populate_pod_label_summary_table(report_period_ids)
            accessor.populate_volume_label_summary_table(report_period_ids)
            accessor.update_line_item_daily_summary_with_enabled_tags(
                start_date, end_date, report_period_ids)

            LOG.info("Updating OpenShift report periods")
            for period in report_periods:
                if period.summary_data_creation_datetime is None:
                    period.summary_data_creation_datetime = self._date_accessor.today_with_timezone(
                        "UTC")
                period.summary_data_updated_datetime = self._date_accessor.today_with_timezone(
                    "UTC")
                period.save()

        return start_date, end_date
Exemplo n.º 21
0
class AWSReportDBAccessor(ReportDBAccessorBase):
    """Class to interact with customer reporting tables."""
    def __init__(self, schema, column_map):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with
            column_map (dict): A mapping of report columns to database columns

        """
        super().__init__(schema, column_map)
        self._datetime_format = Config.AWS_DATETIME_STR_FORMAT
        self.column_map = column_map
        self._schema_name = schema
        self.date_accessor = DateAccessor()

    def get_cost_entry_bills(self):
        """Get all cost entry bill objects."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            columns = [
                'id', 'bill_type', 'payer_account_id', 'billing_period_start',
                'provider_id'
            ]
            bills = self._get_db_obj_query(table_name).values(*columns)
            return {(bill['bill_type'], bill['payer_account_id'],
                     bill['billing_period_start'], bill['provider_id']):
                    bill['id']
                    for bill in bills}

    def get_cost_entry_bills_by_date(self, start_date):
        """Return a cost entry bill for the specified start date."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            return self._get_db_obj_query(table_name)\
                .filter(billing_period_start=start_date)

    # pylint: disable=invalid-name
    def get_cost_entry_bills_query_by_provider(self, provider_id):
        """Return all cost entry bills for the specified provider."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            return self._get_db_obj_query(table_name)\
                .filter(provider_id=provider_id)

    def bills_for_provider_id(self, provider_id, start_date=None):
        """Return all cost entry bills for provider_id on date."""
        bills = self.get_cost_entry_bills_query_by_provider(provider_id)
        if start_date:
            bill_date = parse(start_date).replace(day=1)
            bills = bills.filter(billing_period_start=bill_date)
        return bills

    def get_bill_query_before_date(self, date):
        """Get the cost entry bill objects with billing period before provided date."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            cost_entry_bill_query = base_query.filter(
                billing_period_start__lte=date)
            return cost_entry_bill_query

    def get_lineitem_query_for_billid(self, bill_id):
        """Get the AWS cost entry line item for a given bill query."""
        table_name = AWSCostEntryLineItem
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            line_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return line_item_query

    def get_daily_query_for_billid(self, bill_id):
        """Get the AWS cost daily item for a given bill query."""
        table_name = AWSCostEntryLineItemDaily
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            daily_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return daily_item_query

    def get_summary_query_for_billid(self, bill_id):
        """Get the AWS cost summary item for a given bill query."""
        table_name = AWSCostEntryLineItemDailySummary
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
            return summary_item_query

    def get_ocp_aws_summary_query_for_billid(self, bill_id):
        """Get the OCP-on-AWS report summary item for a given bill query."""
        table_name = AWS_CUR_TABLE_MAP['ocp_on_aws_daily_summary']
        base_query = self._get_db_obj_query(table_name)
        summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
        return summary_item_query

    def get_ocp_aws_project_summary_query_for_billid(self, bill_id):
        """Get the OCP-on-AWS report project summary item for a given bill query."""
        table_name = AWS_CUR_TABLE_MAP['ocp_on_aws_project_daily_summary']
        base_query = self._get_db_obj_query(table_name)
        summary_item_query = base_query.filter(cost_entry_bill_id=bill_id)
        return summary_item_query

    def get_cost_entry_query_for_billid(self, bill_id):
        """Get the AWS cost entry data for a given bill query."""
        table_name = AWSCostEntry
        with schema_context(self.schema):
            base_query = self._get_db_obj_query(table_name)
            line_item_query = base_query.filter(bill_id=bill_id)
            return line_item_query

    def get_cost_entries(self):
        """Make a mapping of cost entries by start time."""
        table_name = AWSCostEntry
        with schema_context(self.schema):
            cost_entries = self._get_db_obj_query(table_name).all()

            return {(ce.bill_id,
                     ce.interval_start.strftime(self._datetime_format)): ce.id
                    for ce in cost_entries}

    def get_products(self):
        """Make a mapping of product sku to product objects."""
        table_name = AWSCostEntryProduct
        with schema_context(self.schema):
            columns = ['id', 'sku', 'product_name', 'region']
            products = self._get_db_obj_query(table_name,
                                              columns=columns).all()

            return {(product['sku'], product['product_name'],
                     product['region']): product['id']
                    for product in products}

    def get_pricing(self):
        """Make a mapping of pricing values string to pricing objects."""
        table_name = AWSCostEntryPricing
        with schema_context(self.schema):
            pricing = self._get_db_obj_query(table_name).all()

            return {
                '{term}-{unit}'.format(term=p.term, unit=p.unit): p.id
                for p in pricing
            }

    def get_reservations(self):
        """Make a mapping of reservation ARN to reservation objects."""
        table_name = AWSCostEntryReservation
        with schema_context(self.schema):
            columns = ['id', 'reservation_arn']
            reservs = self._get_db_obj_query(table_name, columns=columns).all()

            return {res['reservation_arn']: res['id'] for res in reservs}

    def populate_line_item_daily_table(self, start_date, end_date, bill_ids):
        """Populate the daily aggregate of line items table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        table_name = AWS_CUR_TABLE_MAP['line_item_daily']
        daily_sql = pkgutil.get_data(
            'masu.database', 'sql/reporting_awscostentrylineitem_daily.sql')

        daily_sql = daily_sql.decode('utf-8').format(
            uuid=str(uuid.uuid4()).replace('-', '_'),
            start_date=start_date,
            end_date=end_date,
            cost_entry_bill_ids=','.join(bill_ids),
            schema=self.schema)
        self._commit_and_vacuum(table_name, daily_sql, start_date, end_date)

    # pylint: disable=invalid-name
    def populate_line_item_daily_summary_table(self, start_date, end_date,
                                               bill_ids):
        """Populate the daily aggregated summary of line items table.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        table_name = AWS_CUR_TABLE_MAP['line_item_daily_summary']
        summary_sql = pkgutil.get_data(
            'masu.database',
            'sql/reporting_awscostentrylineitem_daily_summary.sql')
        summary_sql = summary_sql.decode('utf-8').format(
            uuid=str(uuid.uuid4()).replace('-', '_'),
            start_date=start_date,
            end_date=end_date,
            cost_entry_bill_ids=','.join(bill_ids),
            schema=self.schema)
        self._commit_and_vacuum(table_name, summary_sql, start_date, end_date)

    def mark_bill_as_finalized(self, bill_id):
        """Mark a bill in the database as finalized."""
        table_name = AWSCostEntryBill
        with schema_context(self.schema):
            bill = self._get_db_obj_query(table_name)\
                .get(id=bill_id)

            if bill.finalized_datetime is None:
                bill.finalized_datetime = self.date_accessor.today_with_timezone(
                    'UTC')
                bill.save()

    # pylint: disable=invalid-name
    def populate_tags_summary_table(self):
        """Populate the line item aggregated totals data table."""
        table_name = AWS_CUR_TABLE_MAP['tags_summary']

        agg_sql = pkgutil.get_data('masu.database',
                                   f'sql/reporting_awstags_summary.sql')
        agg_sql = agg_sql.decode('utf-8').format(schema=self.schema)
        self._commit_and_vacuum(table_name, agg_sql)

    def populate_ocp_on_aws_cost_daily_summary(self, start_date, end_date,
                                               cluster_id, bill_ids):
        """Populate the daily cost aggregated summary for OCP on AWS.

        Args:
            start_date (datetime.date) The date to start populating the table.
            end_date (datetime.date) The date to end on.

        Returns
            (None)

        """
        aws_where_clause = ''
        ocp_where_clause = ''
        if bill_ids:
            ids = ','.join(bill_ids)
            aws_where_clause = f'AND cost_entry_bill_id IN ({ids})'
        if cluster_id:
            ocp_where_clause = f"AND cluster_id = '{cluster_id}'"

        table_name = AWS_CUR_TABLE_MAP['ocp_on_aws_daily_summary']
        summary_sql = pkgutil.get_data(
            'masu.database',
            'sql/reporting_ocpawscostlineitem_daily_summary.sql')
        summary_sql = summary_sql.decode('utf-8').format(
            uuid=str(uuid.uuid4()).replace('-', '_'),
            start_date=start_date,
            end_date=end_date,
            aws_where_clause=aws_where_clause,
            ocp_where_clause=ocp_where_clause,
            schema=self.schema)
        self._commit_and_vacuum(table_name, summary_sql, start_date, end_date)
class OCPReportParquetSummaryUpdater(PartitionHandlerMixin):
    """Class to update OCP report summary data from Presto/Parquet data."""
    def __init__(self, schema, provider, manifest):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with

        """
        self._schema = schema
        self._provider = provider
        self._manifest = manifest
        self._cluster_id = get_cluster_id_from_provider(self._provider.uuid)
        self._cluster_alias = get_cluster_alias_from_cluster_id(
            self._cluster_id)
        self._date_accessor = DateAccessor()

    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        with OCPReportDBAccessor(self._schema) as accessor:
            # This is the normal processing route
            if self._manifest:
                # Override the bill date to correspond with the manifest
                bill_date = self._manifest.billing_period_start_datetime.date()
                report_periods = accessor.get_usage_period_query_by_provider(
                    self._provider.uuid)
                report_periods = report_periods.filter(
                    report_period_start=bill_date).all()
                first_period = report_periods.first()
                do_month_update = False
                with schema_context(self._schema):
                    if first_period:
                        do_month_update = determine_if_full_summary_update_needed(
                            first_period)
                if do_month_update:
                    last_day_of_month = calendar.monthrange(
                        bill_date.year, bill_date.month)[1]
                    start_date = bill_date
                    end_date = bill_date.replace(day=last_day_of_month)
                    LOG.info(
                        "Overriding start and end date to process full month.")

        if isinstance(start_date, str):
            start_date = ciso8601.parse_datetime(start_date).date()
        if isinstance(end_date, str):
            end_date = ciso8601.parse_datetime(end_date).date()

        return start_date, end_date

    def _check_parquet_date_range(self, start_date, end_date):
        """Make sure we don't summarize for a date range we don't have data for."""
        start_datetime = datetime(start_date.year, start_date.month,
                                  start_date.day)
        with OCPReportDBAccessor(self._schema) as accessor:
            min_timestamp, __ = accessor.get_max_min_timestamp_from_parquet(
                self._provider.uuid, start_date, end_date)
            if min_timestamp > start_datetime:
                start_date = min_timestamp.date()
        return start_date, end_date

    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        LOG.info("NO-OP update_daily_tables for: %s-%s", str(start_date),
                 str(end_date))

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        start_date, end_date = self._check_parquet_date_range(
            start_date, end_date)

        with schema_context(self._schema):
            self._handle_partitions(self._schema, UI_SUMMARY_TABLES,
                                    start_date, end_date)

        with OCPReportDBAccessor(self._schema) as accessor:
            with schema_context(self._schema):
                report_period = accessor.report_periods_for_provider_uuid(
                    self._provider.uuid, start_date)
                report_period_id = report_period.id

            for start, end in date_range_pair(start_date,
                                              end_date,
                                              step=settings.TRINO_DATE_STEP):
                LOG.info(
                    "Updating OpenShift report summary tables for \n\tSchema: %s "
                    "\n\tProvider: %s \n\tCluster: %s \n\tReport Period ID: %s \n\tDates: %s - %s",
                    self._schema,
                    self._provider.uuid,
                    self._cluster_id,
                    report_period_id,
                    start,
                    end,
                )
                # This will process POD and STORAGE together
                filters = {
                    "report_period_id": report_period_id
                }  # Use report_period_id to leverage DB index on DELETE
                accessor.delete_line_item_daily_summary_entries_for_date_range_raw(
                    self._provider.uuid, start, end, filters)
                accessor.populate_line_item_daily_summary_table_presto(
                    start, end, report_period_id, self._cluster_id,
                    self._cluster_alias, self._provider.uuid)
                accessor.populate_ui_summary_tables(start, end,
                                                    self._provider.uuid)

            # This will process POD and STORAGE together
            LOG.info(
                "Updating OpenShift label summary tables for \n\tSchema: %s "
                "\n\tReport Period IDs: %s",
                self._schema,
                [report_period_id],
            )
            accessor.populate_pod_label_summary_table([report_period_id],
                                                      start_date, end_date)
            accessor.populate_volume_label_summary_table([report_period_id],
                                                         start_date, end_date)
            accessor.populate_openshift_cluster_information_tables(
                self._provider, self._cluster_id, self._cluster_alias,
                start_date, end_date)
            accessor.update_line_item_daily_summary_with_enabled_tags(
                start_date, end_date, [report_period_id])

            LOG.info("Updating OpenShift report periods")
            if report_period.summary_data_creation_datetime is None:
                report_period.summary_data_creation_datetime = self._date_accessor.today_with_timezone(
                    "UTC")
            report_period.summary_data_updated_datetime = self._date_accessor.today_with_timezone(
                "UTC")
            report_period.save()

            self.check_cluster_infrastructure(start_date, end_date)

        return start_date, end_date

    def check_cluster_infrastructure(self, start_date, end_date):
        LOG.info(
            "Checking if OpenShift cluster %s is running on cloud infrastructure.",
            self._provider.uuid)
        updater_base = OCPCloudUpdaterBase(self._schema, self._provider,
                                           self._manifest)
        infra_map = updater_base.get_infra_map_from_providers()
        if not infra_map:
            # Check the cluster to see if it is running on cloud infrastructure
            infra_map = updater_base._generate_ocp_infra_map_from_sql_trino(
                start_date, end_date)
        if infra_map:
            for ocp_source, infra_tuple in infra_map.items():
                LOG.info("OpenShift cluster %s is running on %s source %s.",
                         ocp_source, infra_tuple[1], infra_tuple[0])
class GCPReportParquetSummaryUpdater:
    """Class to update GCP report parquet summary data."""
    def __init__(self, schema, provider, manifest):
        """Establish parquet summary processor."""
        self._schema = schema
        self._provider = provider
        self._manifest = manifest
        self._date_accessor = DateAccessor()

    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        with GCPReportDBAccessor(self._schema) as accessor:
            # This is the normal processing route
            if self._manifest:
                # Override the bill date to correspond with the manifest
                bill_date = self._manifest.billing_period_start_datetime.date()
                bills = accessor.get_cost_entry_bills_query_by_provider(
                    self._provider.uuid)
                bills = bills.filter(billing_period_start=bill_date).all()
                first_bill = bills.filter(
                    billing_period_start=bill_date).first()
                do_month_update = False
                with schema_context(self._schema):
                    if first_bill:
                        do_month_update = determine_if_full_summary_update_needed(
                            first_bill)
                if do_month_update:
                    last_day_of_month = calendar.monthrange(
                        bill_date.year, bill_date.month)[1]
                    start_date = bill_date
                    end_date = bill_date.replace(day=last_day_of_month)
                    LOG.info(
                        "Overriding start and end date to process full month.")

        if isinstance(start_date, str):
            start_date = ciso8601.parse_datetime(start_date).date()
        if isinstance(end_date, str):
            end_date = ciso8601.parse_datetime(end_date).date()

        return start_date, end_date

    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str): A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        LOG.info("update_daily_tables for: %s-%s", str(start_date),
                 str(end_date))

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)

        with CostModelDBAccessor(self._schema,
                                 self._provider.uuid) as cost_model_accessor:
            markup = cost_model_accessor.markup
            markup_value = float(markup.get("value", 0)) / 100

        with GCPReportDBAccessor(self._schema) as accessor:
            # Need these bills on the session to update dates after processing
            with schema_context(self._schema):
                bills = accessor.bills_for_provider_uuid(
                    self._provider.uuid, start_date)
                bill_ids = [str(bill.id) for bill in bills]
                current_bill_id = bills.first().id if bills else None

            if current_bill_id is None:
                msg = f"No bill was found for {start_date}. Skipping summarization"
                LOG.info(msg)
                return start_date, end_date

            for start, end in date_range_pair(start_date,
                                              end_date,
                                              step=settings.TRINO_DATE_STEP):
                LOG.info(
                    "Updating GCP report summary tables from parquet: \n\tSchema: %s"
                    "\n\tProvider: %s \n\tDates: %s - %s",
                    self._schema,
                    self._provider.uuid,
                    start,
                    end,
                )
                accessor.delete_line_item_daily_summary_entries_for_date_range(
                    self._provider.uuid, start, end)
                accessor.populate_line_item_daily_summary_table_presto(
                    start, end, self._provider.uuid, current_bill_id,
                    markup_value)
                accessor.populate_enabled_tag_keys(start, end, bill_ids)
            accessor.populate_tags_summary_table(bill_ids)
            accessor.update_line_item_daily_summary_with_enabled_tags(
                start_date, end_date, bill_ids)
            for bill in bills:
                if bill.summary_data_creation_datetime is None:
                    bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone(
                        "UTC")
                bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone(
                    "UTC")
                bill.save()

        return start_date, end_date
Exemplo n.º 24
0
class ReportSummaryUpdater:
    """Update reporting summary tables."""
    def __init__(self, customer_schema, provider_uuid, manifest_id=None):
        """
        Initializer.

        Args:
            customer_schema (str): Schema name for given customer.
            provider (str): The provider type.

        """
        self._schema = customer_schema
        self._provider_uuid = provider_uuid
        self._manifest = None
        if manifest_id is not None:
            with ReportManifestDBAccessor() as manifest_accessor:
                self._manifest = manifest_accessor.get_manifest_by_id(
                    manifest_id)
        self._date_accessor = DateAccessor()
        with ProviderDBAccessor(self._provider_uuid) as provider_accessor:
            self._provider = provider_accessor.get_provider()

        if not self._provider:
            raise ReportSummaryUpdaterError("Provider not found.")

        try:
            self._updater, self._ocp_cloud_updater = self._set_updater()
        except Exception as err:
            raise ReportSummaryUpdaterError(err)

        if not self._updater:
            raise ReportSummaryUpdaterError("Invalid provider type specified.")
        LOG.info("Starting report data summarization for provider uuid: %s.",
                 self._provider.uuid)

    def _set_updater(self):
        """
        Create the report summary updater object.

        Object is specific to the report provider.

        Args:
            None

        Returns:
            (Object) : Provider-specific report summary updater

        """
        if self._provider.type in (Provider.PROVIDER_AWS,
                                   Provider.PROVIDER_AWS_LOCAL):
            return (
                AWSReportSummaryUpdater(self._schema, self._provider,
                                        self._manifest),
                OCPCloudReportSummaryUpdater(self._schema, self._provider,
                                             self._manifest),
            )
        if self._provider.type in (Provider.PROVIDER_AZURE,
                                   Provider.PROVIDER_AZURE_LOCAL):
            return (
                AzureReportSummaryUpdater(self._schema, self._provider,
                                          self._manifest),
                OCPCloudReportSummaryUpdater(self._schema, self._provider,
                                             self._manifest),
            )
        if self._provider.type in (Provider.PROVIDER_OCP, ):
            return (
                OCPReportSummaryUpdater(self._schema, self._provider,
                                        self._manifest),
                OCPCloudReportSummaryUpdater(self._schema, self._provider,
                                             self._manifest),
            )

        return (None, None)

    def _format_dates(self, start_date, end_date):
        """Convert dates to strings for use in the updater."""
        if isinstance(start_date, datetime.date):
            start_date = start_date.strftime("%Y-%m-%d")
        if isinstance(end_date, datetime.date):
            end_date = end_date.strftime("%Y-%m-%d")
        elif end_date is None:
            # Run up to the current date
            end_date = self._date_accessor.today_with_timezone("UTC")
            end_date = end_date.strftime("%Y-%m-%d")
        return start_date, end_date

    def update_daily_tables(self, start_date, end_date):
        """
        Update report daily rollup tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.
            manifest_id (str): The particular manifest to use.

        Returns:
            (str, str): The start and end date strings used in the daily SQL.

        """
        start_date, end_date = self._format_dates(start_date, end_date)

        start_date, end_date = self._updater.update_daily_tables(
            start_date, end_date)

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """
        Update report summary tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.
            manifest_id (str): The particular manifest to use.

        Returns:
            None

        """
        start_date, end_date = self._format_dates(start_date, end_date)
        LOG.info("Using start date: %s", start_date)
        LOG.info("Using end date: %s", end_date)

        start_date, end_date = self._updater.update_summary_tables(
            start_date, end_date)

        self._ocp_cloud_updater.update_summary_tables(start_date, end_date)

    def update_cost_summary_table(self, start_date, end_date):
        """
        Update cost summary tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.

        Returns:
            None

        """
        start_date, end_date = self._format_dates(start_date, end_date)

        self._ocp_cloud_updater.update_cost_summary_table(start_date, end_date)
Exemplo n.º 25
0
class ReportProcessorBase:
    """
    Download cost reports from a provider.

    Base object class for downloading cost reports from a cloud provider.
    """
    def __init__(self, schema_name, report_path, compression, provider_uuid,
                 manifest_id, processed_report):
        """Initialize the report processor base class.

        Args:
            schema_name (str): The name of the customer schema to process into
            report_path (str): Where the report file lives in the file system
            compression (CONST): How the report file is compressed.
                Accepted values: UNCOMPRESSED, GZIP_COMPRESSED

        """
        if compression.upper() not in ALLOWED_COMPRESSIONS:
            err_msg = f"Compression {compression} is not supported."
            raise MasuProcessingError(err_msg)

        self._schema = schema_name
        self._report_path = report_path
        self._compression = compression.upper()
        self._provider_uuid = provider_uuid
        self._manifest_id = manifest_id
        self.processed_report = processed_report
        self.date_accessor = DateAccessor()

    @property
    def data_cutoff_date(self):
        """Determine the date we should use to process and delete data."""
        today = self.date_accessor.today_with_timezone("UTC").date()
        data_cutoff_date = today - relativedelta(days=2)
        if today.month != data_cutoff_date.month:
            data_cutoff_date = today.replace(day=1)
        return data_cutoff_date

    def _get_data_for_table(self, row, table_name):
        """Extract the data from a row for a specific table.

        Args:
            row (dict): A dictionary representation of a CSV file row
            table_name (str): The DB table fields are required for

        Returns:
            (dict): The data from the row keyed on the DB table's column names

        """
        column_map = REPORT_COLUMN_MAP[table_name]
        lower_case_column_map = {
            key.lower(): value
            for key, value in column_map.items()
        }

        result = {
            lower_case_column_map[key.lower()]: value
            for key, value in row.items()
            if key.lower() in lower_case_column_map
        }
        return result

    @staticmethod
    def _get_file_opener(compression):
        """Get the file opener for the file's compression.

        Args:
            compression (str): The compression format for the file.

        Returns:
            (file opener, str): The proper file stream handler for the
                compression and the read mode for the file

        """
        if compression == GZIP_COMPRESSED:
            return gzip.open, "rt"
        return open, "r"  # assume uncompressed by default

    def _write_processed_rows_to_csv(self):
        """Output CSV content to file stream object."""
        values = [
            tuple(item.values()) for item in self.processed_report.line_items
        ]

        file_obj = io.StringIO()
        writer = csv.writer(file_obj,
                            delimiter=",",
                            quoting=csv.QUOTE_MINIMAL,
                            quotechar='"')
        writer.writerows(values)
        file_obj.seek(0)

        return file_obj

    def _save_to_db(self, temp_table, report_db_accessor):
        """Save current batch of records to the database."""
        columns = tuple(self.processed_report.line_items[0].keys())
        csv_file = self._write_processed_rows_to_csv()

        report_db_accessor.bulk_insert_rows(csv_file, temp_table, columns)

    def _should_process_row(self,
                            row,
                            date_column,
                            is_full_month,
                            is_finalized=None):
        """Determine if we want to process this row.

        Args:
            row (dict): The line item entry from the AWS report file
            date_column (str): The name of date column to check
            is_full_month (boolean): If this is the first time we've processed this bill

        Kwargs:
            is_finalized (boolean): If this is a finalized bill

        Returns:
            (bool): Whether this row should be processed

        """
        if is_finalized or is_full_month:
            return True
        row_date = ciso8601.parse_datetime(row[date_column]).date()
        if row_date < self.data_cutoff_date:
            return False
        return True

    def _should_process_full_month(self):
        """Determine if we should process the full month of data."""
        if not self._manifest_id:
            log_statement = (
                f"No manifest provided, processing as a new billing period.\n"
                f" Processing entire month.\n"
                f" schema_name: {self._schema},\n"
                f" provider_uuid: {self._provider_uuid},\n"
                f" manifest_id: {self._manifest_id}")
            LOG.info(log_statement)
            return True

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(self._manifest_id)
            bill_date = manifest.billing_period_start_datetime.date()
            provider_uuid = manifest.provider_id

        log_statement = (f"Processing bill starting on {bill_date}.\n"
                         f" Processing entire month.\n"
                         f" schema_name: {self._schema},\n"
                         f" provider_uuid: {self._provider_uuid},\n"
                         f" manifest_id: {self._manifest_id}")

        if (bill_date.month != self.data_cutoff_date.month) or (
                bill_date.year != self.data_cutoff_date.year
                and bill_date.month == self.data_cutoff_date.month):
            LOG.info(log_statement)
            return True

        manifest_list = manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            provider_uuid, bill_date)

        if len(manifest_list) == 1:
            # This is the first manifest for this bill and we are currently
            # processing it
            LOG.info(log_statement)
            return True

        for manifest in manifest_list:
            with ReportManifestDBAccessor() as manifest_accessor:
                if manifest_accessor.manifest_ready_for_summary(manifest.id):
                    log_statement = (
                        f"Processing bill starting on {bill_date}.\n"
                        f" Processing data on or after {self.data_cutoff_date}.\n"
                        f" schema_name: {self._schema},\n"
                        f" provider_uuid: {self._provider_uuid},\n"
                        f" manifest_id: {self._manifest_id}")
                    LOG.info(log_statement)
                    # We have fully processed a manifest for this provider
                    return False

        return True

    def _delete_line_items(self, db_accessor, is_finalized=None):
        """Delete stale data for the report being processed, if necessary."""
        if not self._manifest_id:
            return False

        if is_finalized is None:
            is_finalized = False
        is_full_month = self._should_process_full_month()

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(self._manifest_id)
            num_processed_files = manifest_accessor.number_of_files_processed(
                self._manifest_id)
            if num_processed_files != 0:
                return False
            # Override the bill date to correspond with the manifest
            bill_date = manifest.billing_period_start_datetime.date()
            provider_uuid = manifest.provider_id

        date_filter = self.get_date_column_filter()

        with db_accessor(self._schema) as accessor:
            bills = accessor.get_cost_entry_bills_query_by_provider(
                provider_uuid)
            bills = bills.filter(billing_period_start=bill_date).all()
            with schema_context(self._schema):
                for bill in bills:
                    line_item_query = accessor.get_lineitem_query_for_billid(
                        bill.id)
                    delete_date = bill_date
                    if not is_finalized and not is_full_month:
                        delete_date = self.data_cutoff_date
                        # This means we are processing a mid-month update
                        # and only need to delete a small window of data
                        line_item_query = line_item_query.filter(**date_filter)
                    log_statement = (f"Deleting data for:\n"
                                     f" schema_name: {self._schema}\n"
                                     f" provider_uuid: {provider_uuid}\n"
                                     f" bill date: {str(bill_date)}\n"
                                     f" bill ID: {bill.id}\n"
                                     f" on or after {delete_date}.")
                    LOG.info(log_statement)
                    line_item_query.delete()

        return True

    def get_date_column_filter(self):
        """Return a filter using the provider-appropriate column."""
        with ProviderDBAccessor(self._provider_uuid) as provider_accessor:
            type = provider_accessor.get_type()
        if type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL):
            return {"usage_date__gte": self.data_cutoff_date}
        else:
            return {"usage_start__gte": self.data_cutoff_date}

    @staticmethod
    def remove_temp_cur_files(report_path):
        """Remove temporary report files."""
        # Remove any old files that have failed processing.
        removed_files = []
        return removed_files
Exemplo n.º 26
0
class GCPReportSummaryUpdater:
    """Class to update GCP report summary data."""
    def __init__(self, schema, provider, manifest):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with

        """
        self._schema = schema
        self._provider = provider
        self._manifest = manifest
        self._date_accessor = DateAccessor()

    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        with GCPReportDBAccessor(self._schema) as accessor:
            # This is the normal processing route
            if self._manifest:
                report_range = accessor.get_gcp_scan_range_from_report_name(
                    manifest_id=self._manifest.id)
                start_date = report_range.get("start", start_date)
                end_date = report_range.get("end", end_date)

        return start_date, end_date

    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str): A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        bills = get_bills_from_provider(
            self._provider.uuid,
            self._schema,
            datetime.datetime.strptime(start_date, "%Y-%m-%d"),
            datetime.datetime.strptime(end_date, "%Y-%m-%d"),
        )
        bill_ids = []
        with schema_context(self._schema):
            bill_ids = [str(bill.id) for bill in bills]

        with GCPReportDBAccessor(self._schema) as accessor:
            for start, end in date_range_pair(start_date, end_date):
                LOG.info(
                    "Updating GCP report daily tables for \n\tSchema: %s"
                    "\n\tProvider: %s \n\tDates: %s - %s\n\tBills: %s",
                    self._schema,
                    self._provider.uuid,
                    start,
                    end,
                    str(bill_ids),
                )
                accessor.populate_line_item_daily_table(start, end, bill_ids)

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        bills = get_bills_from_provider(
            self._provider.uuid,
            self._schema,
            datetime.datetime.strptime(start_date, "%Y-%m-%d"),
            datetime.datetime.strptime(end_date, "%Y-%m-%d"),
        )
        bill_ids = []
        with schema_context(self._schema):
            bill_ids = [str(bill.id) for bill in bills]

        with GCPReportDBAccessor(self._schema) as accessor:
            # Need these bills on the session to update dates after processing
            bills = accessor.bills_for_provider_uuid(self._provider.uuid,
                                                     start_date)
            for start, end in date_range_pair(start_date, end_date):
                LOG.info(
                    "Updating GCP report summary tables: \n\tSchema: %s"
                    "\n\tProvider: %s \n\tDates: %s - %s\n\tBills: %s",
                    self._schema,
                    self._provider.uuid,
                    start,
                    end,
                    str(bill_ids),
                )
                accessor.populate_line_item_daily_summary_table(
                    start, end, bill_ids)
            accessor.populate_tags_summary_table(bill_ids, start_date,
                                                 end_date)
            for bill in bills:
                if bill.summary_data_creation_datetime is None:
                    bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone(
                        "UTC")
                bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone(
                    "UTC")
                bill.save()

        return start_date, end_date
Exemplo n.º 27
0
class OCPReportSummaryUpdater:
    """Class to update OCP report summary data."""
    def __init__(self, schema, provider, manifest):
        """Establish the database connection.

        Args:
            schema (str): The customer schema to associate with

        """
        self._schema_name = schema
        self._provider = provider
        self._manifest = manifest
        self._cluster_id = get_cluster_id_from_provider(self._provider.uuid)
        with ReportingCommonDBAccessor() as reporting_common:
            self._column_map = reporting_common.column_map
        self._date_accessor = DateAccessor()

    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        LOG.info(
            'Updating OpenShift report daily tables for \n\tSchema: %s '
            '\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s',
            self._schema_name, self._provider.uuid, self._cluster_id,
            start_date, end_date)
        with OCPReportDBAccessor(self._schema_name,
                                 self._column_map) as accessor:
            accessor.populate_line_item_daily_table(start_date, end_date,
                                                    self._cluster_id)
        with OCPReportDBAccessor(self._schema_name,
                                 self._column_map) as accessor:
            accessor.populate_storage_line_item_daily_table(
                start_date, end_date, self._cluster_id)

        return start_date, end_date

    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        LOG.info(
            'Updating OpenShift report summary tables for \n\tSchema: %s '
            '\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s',
            self._schema_name, self._provider.uuid, self._cluster_id,
            start_date, end_date)

        report_periods = None
        with OCPReportDBAccessor(self._schema_name,
                                 self._column_map) as accessor:
            report_periods = accessor.report_periods_for_provider_id(
                self._provider.id, start_date)
            accessor.populate_line_item_daily_summary_table(
                start_date, end_date, self._cluster_id)
            accessor.populate_pod_label_summary_table()
            accessor.populate_storage_line_item_daily_summary_table(
                start_date, end_date, self._cluster_id)
            accessor.populate_volume_claim_label_summary_table()
            accessor.populate_volume_label_summary_table()

            for period in report_periods:
                if period.summary_data_creation_datetime is None:
                    period.summary_data_creation_datetime = \
                        self._date_accessor.today_with_timezone('UTC')
                period.summary_data_updated_datetime = \
                    self._date_accessor.today_with_timezone('UTC')
                period.save()

        return start_date, end_date

    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        # Default to this month's bill
        with OCPReportDBAccessor(self._schema_name,
                                 self._column_map) as accessor:
            if self._manifest:
                # Override the bill date to correspond with the manifest
                bill_date = self._manifest.billing_period_start_datetime.date()
                report_periods = accessor.get_usage_period_query_by_provider(
                    self._provider.id)
                report_periods = report_periods.filter(
                    report_period_start=bill_date).all()
                do_month_update = True
                with schema_context(self._schema_name):
                    if report_periods is not None and len(report_periods) > 0:
                        do_month_update = self._determine_if_full_summary_update_needed(
                            report_periods[0])
                if do_month_update:
                    last_day_of_month = calendar.monthrange(
                        bill_date.year, bill_date.month)[1]
                    start_date = bill_date.strftime('%Y-%m-%d')
                    end_date = bill_date.replace(day=last_day_of_month)
                    end_date = end_date.strftime('%Y-%m-%d')
                    LOG.info(
                        'Overriding start and end date to process full month.')
                LOG.info('Returning start: %s, end: %s', str(start_date),
                         str(end_date))
        return start_date, end_date

    def _determine_if_full_summary_update_needed(self, report_period):
        """Decide whether to update summary tables for full billing period."""
        processed_files = self._manifest.num_processed_files
        total_files = self._manifest.num_total_files

        summary_creation = report_period.summary_data_creation_datetime
        is_done_processing = processed_files == total_files
        is_new_period = summary_creation is None

        # Run the full month if this is the first time we've seen this report
        # period
        if is_done_processing and is_new_period:
            return True

        return False
Exemplo n.º 28
0
class ProviderDBAccessor(KokuDBAccess):
    """Class to interact with the koku database for Provider Data."""

    def __init__(self, provider_uuid=None, auth_id=None):
        """
        Establish Provider database connection.

        Args:
            provider_uuid  (String) the uuid of the provider
            auth_id        (String) provider authentication database id

        """
        super().__init__("public")
        self._uuid = provider_uuid
        self._auth_id = auth_id
        self._table = Provider
        self._provider = None
        self.date_accessor = DateAccessor()

    @property
    def provider(self):
        """Return the provider this accessor is instantiated for."""
        query = self._get_db_obj_query()
        if self._provider is None and query:
            self._provider = query.first()
        return self._provider

    @property
    def infrastructure(self):
        """Return the infrastructure object for the provider."""
        if self.provider:
            return self.provider.infrastructure
        return None

    def _get_db_obj_query(self):
        """
        Return the sqlachemy query for the provider object.

        Args:
            None
        Returns:
            (sqlalchemy.orm.query.Query): "SELECT public.api_customer.group_ptr_id ..."

        """
        if not self._auth_id and not self._uuid:
            return self._table.objects.none()
        query = self._table.objects.all()
        if self._auth_id:
            query = query.filter(authentication_id=self._auth_id)
        if self._uuid:
            query = query.filter(uuid=self._uuid)

        return query

    def get_provider(self):
        """Return the provider."""
        return self.provider

    def get_uuid(self):
        """
        Return the provider uuid.

        Args:
            None
        Returns:
            (String): "UUID v4",
                    example: "edf94475-235e-4b64-ba18-0b81f2de9c9e"

        """
        return str(self.provider.uuid) if self.provider else None

    def get_provider_name(self):
        """
        Return the provider name.

        Args:
            None
        Returns:
            (String): "Provider Name assigned by the customer",
                    example: "Test Provider"

        """
        return self.provider.name if self.provider else None

    def get_type(self):
        """
        Return the provider type.

        Args:
            None
        Returns:
            (String): "Provider type.  Cloud backend name",
                    example: "AWS"

        """
        return self.provider.type if self.provider else None

    def get_additional_context(self):
        """
        Returns additional context information.

        Args:
            None
        Returns:
            (dict): { 'crawl_hierarchy': True }
        """
        return self.provider.additional_context if self.provider else {}

    def get_credentials(self):
        """
        Return the credential information.

        Args:
            None
        Returns:
            (dict): {"credentials": "Provider Resource Name.  i.e. AWS: RoleARN"},
                    example: {"role_arn": "arn:aws:iam::111111111111:role/CostManagement"}

        """
        credentials = None
        if self.provider and self.provider.authentication:
            credentials = self.provider.authentication.credentials
        return credentials

    def get_data_source(self):
        """
        Return the data_source information.

        Args:
            None
        Returns:
            (dict): "Identifier for cost usage report.  i.e. AWS: S3 Bucket",
                    example: {"bucket": "my-s3-cur-bucket"}

        """
        data_source = None
        if self.provider and self.provider.billing_source:
            data_source = self.provider.billing_source.data_source
        return data_source

    def get_setup_complete(self):
        """
        Return whether or not a report has been processed.

        Args:
            None
        Returns:
            (Boolean): "True if a report has been processed for the provider.",

        """
        return self.provider.setup_complete if self.provider else None

    def setup_complete(self):
        """
        Set setup_complete to True.

        Args:
            None
        Returns:
            None

        """
        self.provider.setup_complete = True
        self.provider.save()
        invalidate_view_cache_for_tenant_and_cache_key(SOURCES_CACHE_PREFIX)

    def get_customer_uuid(self):
        """
        Return the provider's customer uuid.

        Args:
            None
        Returns:
            (String): "UUID v4",
                    example: "edf94475-235e-4b64-ba18-0b81f2de9c9e"

        """
        return str(self.provider.customer.uuid)

    def get_customer_name(self):
        """
        Return the provider's customer name.

        Args:
            None
        Returns:
            (String): "Name of the customer",
                    example: "Customer 1 Inc."

        """
        return self.get_schema()

    def get_schema(self):
        """
        Return the schema for the customer.

        Args:
            None
        Returns:
            (String): "Name of the database schema",

        """
        return self.provider.customer.schema_name

    def get_infrastructure_type(self):
        """Retrun the infrastructure type for an OpenShift provider."""
        if self.infrastructure:
            return self.infrastructure.infrastructure_type
        return None

    def get_infrastructure_provider_uuid(self):
        """Return the UUID of the infrastructure provider an OpenShift cluster is installed on."""
        if self.infrastructure:
            infra_uuid = self.infrastructure.infrastructure_provider.uuid
            return str(infra_uuid) if infra_uuid else None
        return None

    @transaction.atomic()
    def set_infrastructure(self, infrastructure_provider_uuid, infrastructure_type):
        """Create an infrastructure mapping for an OpenShift provider.

        Args:
            infrastructure_type (str): The provider type this cluster is installed on.
                Ex. AWS, AZURE, GCP
            infrastructure_provider_uuid (str): The UUID of the provider this cluster
                is installed on.

        Returns:
            None

        """
        mapping, _ = ProviderInfrastructureMap.objects.get_or_create(
            infrastructure_provider_id=infrastructure_provider_uuid, infrastructure_type=infrastructure_type
        )

        self.provider.infrastructure = mapping
        self.provider.save()
        invalidate_view_cache_for_tenant_and_cache_key(SOURCES_CACHE_PREFIX)

    def get_associated_openshift_providers(self):
        """Return a list of OpenShift clusters associated with the cloud provider."""
        associated_openshift_providers = []

        mapping = ProviderInfrastructureMap.objects.filter(infrastructure_provider_id=self.provider.uuid).first()

        if mapping:
            associated_openshift_providers = Provider.objects.filter(infrastructure=mapping).all()

        return associated_openshift_providers

    def set_data_updated_timestamp(self):
        """Set the data updated timestamp to the current time."""
        if self.provider:
            self.provider.data_updated_timestamp = self.date_accessor.today_with_timezone("UTC")
            self.provider.save()
            invalidate_view_cache_for_tenant_and_cache_key(SOURCES_CACHE_PREFIX)

    def set_additional_context(self, new_value):
        """Sets the additional context value."""
        if self.provider:
            self.provider.additional_context = new_value
            self.provider.save()
            invalidate_view_cache_for_tenant_and_cache_key(SOURCES_CACHE_PREFIX)
class ReportManifestDBAccessor(KokuDBAccess):
    """Class to interact with the koku database for CUR processing statistics."""
    def __init__(self):
        """Access the AWS report manifest database table."""
        self._schema = "public"
        super().__init__(self._schema)
        self._table = CostUsageReportManifest
        self.date_accessor = DateAccessor()

    def get_manifest(self, assembly_id, provider_uuid):
        """Get the manifest associated with the provided provider and id."""
        query = self._get_db_obj_query()
        return query.filter(provider_id=provider_uuid).filter(
            assembly_id=assembly_id).first()

    def get_manifest_by_id(self, manifest_id):
        """Get the manifest by id."""
        with schema_context(self._schema):
            query = self._get_db_obj_query()
            return query.filter(id=manifest_id).first()

    def mark_manifest_as_updated(self, manifest):
        """Update the updated timestamp."""
        if manifest:
            manifest.manifest_updated_datetime = self.date_accessor.today_with_timezone(
                "UTC")
            manifest.save()

    def mark_manifest_as_completed(self, manifest):
        """Update the updated timestamp."""
        if manifest:
            manifest.manifest_completed_datetime = self.date_accessor.today_with_timezone(
                "UTC")
            manifest.save()

    def update_number_of_files_for_manifest(self, manifest):
        """Update the number of files for manifest."""
        set_num_of_files = CostUsageReportStatus.objects.filter(
            manifest_id=manifest.id).count()
        if manifest:
            manifest.num_total_files = set_num_of_files
            manifest.save()

    def add(self, **kwargs):
        """
        Add a new row to the CUR stats database.

        Args:
            kwargs (dict): Fields containing CUR Manifest attributes.
                Valid keys are: assembly_id,
                                billing_period_start_datetime,
                                num_total_files,
                                provider_uuid,
        Returns:
            None

        """
        if "manifest_creation_datetime" not in kwargs:
            kwargs[
                "manifest_creation_datetime"] = self.date_accessor.today_with_timezone(
                    "UTC")

        # The Django model insists on calling this field provider_id
        if "provider_uuid" in kwargs:
            uuid = kwargs.pop("provider_uuid")
            kwargs["provider_id"] = uuid

        return super().add(**kwargs)

    def manifest_ready_for_summary(self, manifest_id):
        """Determine if the manifest is ready to summarize."""
        return not self.is_last_completed_datetime_null(manifest_id)

    def number_of_files_processed(self, manifest_id):
        """Return the number of files processed in a manifest."""
        return CostUsageReportStatus.objects.filter(
            manifest_id=manifest_id,
            last_completed_datetime__isnull=False).count()

    def is_last_completed_datetime_null(self, manifest_id):
        """Determine if nulls exist in last_completed_datetime for manifest_id.

        If the record does not exist, that is equivalent to a null completed dateimte.
        Return True if record either doesn't exist or if null `last_completed_datetime`.
        Return False otherwise.

        """
        record = CostUsageReportStatus.objects.filter(manifest_id=manifest_id)
        if record:
            return record.filter(last_completed_datetime__isnull=True).exists()
        return True

    def get_manifest_list_for_provider_and_bill_date(self, provider_uuid,
                                                     bill_date):
        """Return all manifests for a provider and bill date."""
        filters = {
            "provider_id": provider_uuid,
            "billing_period_start_datetime__date": bill_date
        }
        return CostUsageReportManifest.objects.filter(**filters).all()

    def get_last_seen_manifest_ids(self, bill_date):
        """Return a tuple containing the assembly_id of the last seen manifest and a boolean

        The boolean will state whether or not that manifest has been processed."""
        assembly_ids = []
        # The following query uses a window function to rank the manifests for all the providers,
        # and then just pulls out the top ranked (most recent) manifests
        manifests = (CostUsageReportManifest.objects.filter(
            billing_period_start_datetime=bill_date).annotate(
                row_number=Window(
                    expression=RowNumber(),
                    partition_by=F("provider_id"),
                    order_by=F("manifest_creation_datetime").desc(),
                )).order_by("row_number"))
        for manifest in [
                manifest for manifest in manifests if manifest.row_number == 1
        ]:
            # loop through the manifests and decide if they have finished processing
            processed = self.manifest_ready_for_summary(manifest.id)
            # if all of the files for the manifest have been processed we don't want to add it
            # to assembly_ids because it is safe to delete
            if not processed:
                assembly_ids.append(manifest.assembly_id)
        return assembly_ids

    def purge_expired_report_manifest(self, provider_type, expired_date):
        """
        Deletes Cost usage Report Manifests older than expired_date.

        Args:
            provider_type   (String) the provider type to delete associated manifests
            expired_date (datetime.datetime) delete all manifests older than this date, exclusive.
        """
        delete_count = CostUsageReportManifest.objects.filter(
            provider__type=provider_type,
            billing_period_start_datetime__lt=expired_date).delete()[0]
        LOG.info(
            "Removed %s CostUsageReportManifest(s) for provider type %s that had a billing period start date before %s",
            delete_count,
            provider_type,
            expired_date,
        )

    def purge_expired_report_manifest_provider_uuid(self, provider_uuid,
                                                    expired_date):
        """
        Delete cost usage reports older than expired_date and provider_uuid.

        Args:
            provider_uuid (uuid) The provider uuid to use to delete associated manifests
            expired_date (datetime.datetime) delete all manifests older than this date, exclusive.
        """
        delete_count = CostUsageReportManifest.objects.filter(
            provider_id=provider_uuid,
            billing_period_start_datetime__lt=expired_date).delete()
        LOG.info(
            "Removed %s CostUsageReportManifest(s) for provider_uuid %s that had a billing period start date before %s",
            delete_count,
            provider_uuid,
            expired_date,
        )

    def get_s3_csv_cleared(self, manifest):
        """Return whether we have cleared CSV files from S3 for this manifest."""
        s3_csv_cleared = False
        if manifest:
            s3_csv_cleared = manifest.s3_csv_cleared
        return s3_csv_cleared

    def mark_s3_csv_cleared(self, manifest):
        """Return whether we have cleared CSV files from S3 for this manifest."""
        if manifest:
            manifest.s3_csv_cleared = True
            manifest.save()

    def get_s3_parquet_cleared(self, manifest):
        """Return whether we have cleared CSV files from S3 for this manifest."""
        s3_parquet_cleared = False
        if manifest:
            s3_parquet_cleared = manifest.s3_parquet_cleared
        return s3_parquet_cleared

    def mark_s3_parquet_cleared(self, manifest):
        """Return whether we have cleared CSV files from S3 for this manifest."""
        if manifest:
            manifest.s3_parquet_cleared = True
            manifest.save()

    def get_max_export_time_for_manifests(self, provider_uuid, bill_date):
        """Return the max export time for manifests given provider and bill date."""
        filters = {
            "provider_id": provider_uuid,
            "billing_period_start_datetime__date": bill_date
        }
        manifests = CostUsageReportManifest.objects.filter(**filters).all()
        max_export = manifests.aggregate(Max("export_time"))
        return max_export.get("export_time__max")
Exemplo n.º 30
0
class ReportSummaryUpdater:
    """Update reporting summary tables."""

    def __init__(self, customer_schema, provider_uuid, manifest_id=None, tracing_id=None):
        """
        Initializer.

        Args:
            customer_schema (str): Schema name for given customer.
            provider (str): The provider type.

        """
        self._schema = customer_schema
        self._provider_uuid = provider_uuid
        self._manifest = None
        self._tracing_id = tracing_id
        if manifest_id is not None:
            with ReportManifestDBAccessor() as manifest_accessor:
                self._manifest = manifest_accessor.get_manifest_by_id(manifest_id)
        self._date_accessor = DateAccessor()
        with ProviderDBAccessor(self._provider_uuid) as provider_accessor:
            self._provider = provider_accessor.get_provider()

        if not self._provider:
            raise ReportSummaryUpdaterProviderNotFoundError(
                f"Provider data for uuid '{self._provider_uuid}' not found."
            )

        try:
            self._updater, self._ocp_cloud_updater = self._set_updater()
        except Exception as err:
            raise ReportSummaryUpdaterError(err)

        if not self._updater:
            raise ReportSummaryUpdaterError("Invalid provider type specified.")
        msg = f"Starting report data summarization for provider uuid: {self._provider.uuid}."
        LOG.info(log_json(self._tracing_id, msg))

    @cached_property
    def trino_enabled(self):
        """Return whether the source is enabled for Trino processing."""
        return enable_trino_processing(self._provider_uuid, self._provider.type, self._schema)

    def _set_updater(self):
        """
        Create the report summary updater object.

        Object is specific to the report provider.

        Args:
            None

        Returns:
            (Object) : Provider-specific report summary updater

        """
        if self._provider.type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL):
            report_summary_updater = AWSReportParquetSummaryUpdater if self.trino_enabled else AWSReportSummaryUpdater
        elif self._provider.type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL):
            report_summary_updater = (
                AzureReportParquetSummaryUpdater if self.trino_enabled else AzureReportSummaryUpdater
            )
        elif self._provider.type in (Provider.PROVIDER_OCP,):
            report_summary_updater = OCPReportParquetSummaryUpdater if self.trino_enabled else OCPReportSummaryUpdater
        elif self._provider.type in (Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL):
            report_summary_updater = GCPReportParquetSummaryUpdater if self.trino_enabled else GCPReportSummaryUpdater
        else:
            return (None, None)

        ocp_cloud_updater = OCPCloudParquetReportSummaryUpdater if self.trino_enabled else OCPCloudReportSummaryUpdater

        LOG.info(f"Set report_summary_updater = {report_summary_updater.__name__}")
        return (
            report_summary_updater(self._schema, self._provider, self._manifest),
            ocp_cloud_updater(self._schema, self._provider, self._manifest),
        )

    def _format_dates(self, start_date, end_date):
        """Convert dates to strings for use in the updater."""
        if isinstance(start_date, datetime.date):
            start_date = start_date.strftime("%Y-%m-%d")
        if isinstance(end_date, datetime.date):
            end_date = end_date.strftime("%Y-%m-%d")
        elif end_date is None:
            # Run up to the current date
            end_date = self._date_accessor.today_with_timezone("UTC")
            end_date = end_date.strftime("%Y-%m-%d")
        return start_date, end_date

    def update_daily_tables(self, start_date, end_date):
        """
        Update report daily rollup tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.

        Returns:
            (str, str): The start and end date strings used in the daily SQL.

        """
        msg = f"Daily summary starting for source {self._provider_uuid}"
        LOG.info(log_json(self._tracing_id, msg))
        start_date, end_date = self._format_dates(start_date, end_date)

        start_date, end_date = self._updater.update_daily_tables(start_date, end_date)

        invalidate_view_cache_for_tenant_and_source_type(self._schema, self._provider.type)
        msg = f"Daily summary completed for source {self._provider_uuid}"
        LOG.info(log_json(self._tracing_id, msg))
        return start_date, end_date

    def update_summary_tables(self, start_date, end_date, tracing_id):
        """
        Update report summary tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.
            tracing_id (str): The tracing_id.

        Returns:
            None

        """
        msg = f"Summary processing starting for source {self._provider_uuid}"
        LOG.info(log_json(self._tracing_id, msg))
        start_date, end_date = self._format_dates(start_date, end_date)
        LOG.info(log_json(tracing_id, f"Using start date: {start_date}"))
        LOG.info(log_json(tracing_id, f"Using end date: {end_date}"))

        start_date, end_date = self._updater.update_summary_tables(start_date, end_date)

        msg = f"Summary processing completed for source {self._provider_uuid}"
        LOG.info(log_json(self._tracing_id, msg))

        invalidate_view_cache_for_tenant_and_source_type(self._schema, self._provider.type)

        return start_date, end_date

    def get_openshift_on_cloud_infra_map(self, start_date, end_date, tracing_id):
        """Get cloud infrastructure source and OpenShift source mapping."""
        infra_map = {}
        try:
            if self._provider.type in Provider.OPENSHIFT_ON_CLOUD_PROVIDER_LIST:
                msg = f"Getting OpenShift on Cloud infrastructure map for {self._provider_uuid}"
                LOG.info(log_json(self._tracing_id, msg))
                start_date, end_date = self._format_dates(start_date, end_date)
                LOG.info(log_json(tracing_id, f"Using start date: {start_date}"))
                LOG.info(log_json(tracing_id, f"Using end date: {end_date}"))
                infra_map = self._ocp_cloud_updater.get_infra_map(start_date, end_date)
        except Exception as ex:
            raise ReportSummaryUpdaterCloudError(str(ex))

        return infra_map

    def update_openshift_on_cloud_summary_tables(
        self, start_date, end_date, ocp_provider_uuid, infra_provider_uuid, infra_provider_type, tracing_id
    ):
        """
        Update report summary tables.

        Args:
            start_date (str, datetime): When to start.
            end_date (str, datetime): When to end.
            tracing_id (str): The tracing_id.

        Returns:
            None

        """

        if self._provider.type in Provider.OPENSHIFT_ON_CLOUD_PROVIDER_LIST:
            msg = f"OpenShift on {infra_provider_type} summary processing starting for source {self._provider_uuid}"
            LOG.info(log_json(self._tracing_id, msg))
            start_date, end_date = self._format_dates(start_date, end_date)
            LOG.info(log_json(tracing_id, f"Using start date: {start_date}"))
            LOG.info(log_json(tracing_id, f"Using end date: {end_date}"))
            try:
                self._ocp_cloud_updater.update_summary_tables(
                    start_date, end_date, ocp_provider_uuid, infra_provider_uuid, infra_provider_type
                )
                msg = (
                    f"OpenShift on {infra_provider_type} summary processing completed",
                    f" for source {self._provider_uuid}",
                )
                LOG.info(log_json(self._tracing_id, msg))
                invalidate_view_cache_for_tenant_and_source_type(self._schema, self._provider.type)
            except Exception as ex:
                raise ReportSummaryUpdaterCloudError(str(ex))
        else:
            msg = (
                f"{infra_provider_type} is not in {Provider.OPENSHIFT_ON_CLOUD_PROVIDER_LIST}.",
                "Not running OpenShift on Cloud summary.",
            )
            LOG.info(log_json(self._tracing_id, msg))