Exemplo n.º 1
0
    def test_task_is_running_false(self):
        """Test that a task is not running."""
        task_list = [1, 2, 3]
        _cache = WorkerCache()
        _cache.set_host_specific_task_list(task_list)

        self.assertFalse(_cache.task_is_running(4))
Exemplo n.º 2
0
    def __init__(self, task, download_path=None, **kwargs):
        """
        Create a downloader.

        Args:
            task          (Object) bound celery object
            download_path (String) filesystem path to store downloaded files

        Kwargs:
            customer_name     (String) customer name
            access_credential (Dict) provider access credentials
            report_source     (String) cost report source
            provider_type     (String) cloud provider type
            provider_uuid     (String) cloud provider uuid
            report_name       (String) cost report name

        """
        self._task = task

        if download_path:
            self.download_path = download_path
        else:
            self.download_path = mkdtemp(prefix="masu")
        self.worker_cache = WorkerCache()
        self._cache_key = kwargs.get("cache_key")
        self._provider_uuid = kwargs.get("provider_uuid")
        self.request_id = kwargs.get("request_id")
        self.account = kwargs.get("account")
        self.context = {"request_id": self.request_id, "provider_uuid": self._provider_uuid, "account": self.account}
Exemplo n.º 3
0
    def test_task_is_running_false(self):
        """Test that a task is not running."""
        task_list = [1, 2, 3]
        _cache = WorkerCache()
        for task in task_list:
            _cache.add_task_to_cache(task)

        self.assertFalse(_cache.task_is_running(4))
Exemplo n.º 4
0
    def test_set_host_specific_task_list(self):
        """Test that setting a task list works."""
        task_list = [1, 2, 3]
        _cache = WorkerCache()

        self.assertEqual(_cache.host_specific_worker_cache, [])

        _cache.set_host_specific_task_list(task_list)
        self.assertEqual(_cache.host_specific_worker_cache, task_list)
Exemplo n.º 5
0
    def test_add_task_to_cache(self):
        """Test that a single task is added."""
        task_key = "task_key"
        _cache = WorkerCache()

        self.assertEqual(_cache.worker_cache, [])

        _cache.add_task_to_cache(task_key)
        self.assertEqual(_cache.worker_cache, [task_key])
    def test_check_if_manifest_should_be_downloaded_task_currently_running(
            self):
        """Test that a manifest being processed should not be reprocessed."""
        _cache = WorkerCache()
        _cache.add_task_to_cache(self.cache_key)

        result = self.downloader.check_if_manifest_should_be_downloaded(
            self.assembly_id)
        self.assertFalse(result)
Exemplo n.º 7
0
    def test_get_all_running_tasks(self, mock_inspect):
        """Test that multiple hosts' task lists are combined."""

        second_host = "koku-worker-2-sdfsdff"
        first_host_list = [1, 2, 3]
        second_host_list = [4, 5, 6]
        expected = first_host_list + second_host_list

        mock_worker_list = {
            "celery@kokuworker": "",
            f"celery@{second_host}": ""
        }
        mock_inspect.reserved.return_value = mock_worker_list

        _cache = WorkerCache()
        for task in first_host_list:
            _cache.add_task_to_cache(task)

        with override_settings(HOSTNAME=second_host):
            _cache = WorkerCache()
            for task in second_host_list:
                _cache.add_task_to_cache(task)

        self.assertEqual(sorted(_cache.get_all_running_tasks()),
                         sorted(expected))
Exemplo n.º 8
0
    def __init__(self, billing_source=None, provider_uuid=None):
        """
        Orchestrator for processing.

        Args:
            billing_source (String): Individual account to retrieve.

        """
        self._accounts, self._polling_accounts = self.get_accounts(
            billing_source, provider_uuid)
        self.worker_cache = WorkerCache()
Exemplo n.º 9
0
    def test_task_is_running_true(self, mock_inspect):
        """Test that a task is running."""
        mock_worker_list = {"celery@kokuworker": ""}
        mock_inspect.reserved.return_value = mock_worker_list

        task_list = [1, 2, 3]

        _cache = WorkerCache()
        for task in task_list:
            _cache.add_task_to_cache(task)

        self.assertTrue(_cache.task_is_running(1))
Exemplo n.º 10
0
def refresh_materialized_views(  # noqa: C901
    schema_name,
    provider_type,
    manifest_id=None,
    provider_uuid="",
    synchronous=False,
    queue_name=None,
    tracing_id=None,
):
    """Refresh the database's materialized views for reporting."""
    task_name = "masu.processor.tasks.refresh_materialized_views"
    cache_args = [schema_name, provider_type, provider_uuid]
    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(log_json(tracing_id, msg))
            refresh_materialized_views.s(
                schema_name,
                provider_type,
                manifest_id=manifest_id,
                provider_uuid=provider_uuid,
                synchronous=synchronous,
                queue_name=queue_name,
                tracing_id=tracing_id,
            ).apply_async(queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
            return
        worker_cache.lock_single_task(task_name,
                                      cache_args,
                                      timeout=settings.WORKER_CACHE_TIMEOUT)
    materialized_views = ()
    try:
        with schema_context(schema_name):
            for view in materialized_views:
                table_name = view._meta.db_table
                with connection.cursor() as cursor:
                    cursor.execute(
                        f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}")
                    LOG.info(log_json(tracing_id, f"Refreshed {table_name}."))

        invalidate_view_cache_for_tenant_and_source_type(
            schema_name, provider_type)

        if provider_uuid:
            ProviderDBAccessor(provider_uuid).set_data_updated_timestamp()
        if manifest_id:
            # Processing for this monifest should be complete after this step
            with ReportManifestDBAccessor() as manifest_accessor:
                manifest = manifest_accessor.get_manifest_by_id(manifest_id)
                manifest_accessor.mark_manifest_as_completed(manifest)
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Exemplo n.º 11
0
def update_cost_model_costs(schema_name,
                            provider_uuid,
                            start_date=None,
                            end_date=None,
                            queue_name=None,
                            synchronous=False):
    """Update usage charge information.

    Args:
        schema_name (str) The DB schema name.
        provider_uuid (str) The provider uuid.
        start_date (str, Optional) - Start date of range to update derived cost.
        end_date (str, Optional) - End date of range to update derived cost.

    Returns
        None

    """
    task_name = "masu.processor.tasks.update_cost_model_costs"
    cache_args = [schema_name, provider_uuid, start_date, end_date]
    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(msg)
            update_cost_model_costs.s(
                schema_name,
                provider_uuid,
                start_date=start_date,
                end_date=end_date,
                queue_name=queue_name,
                synchronous=synchronous,
            ).apply_async(queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE)
            return
        worker_cache.lock_single_task(task_name, cache_args, timeout=600)

    worker_stats.COST_MODEL_COST_UPDATE_ATTEMPTS_COUNTER.inc()

    stmt = (f"update_cost_model_costs called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
    LOG.info(stmt)

    try:
        updater = CostModelCostUpdater(schema_name, provider_uuid)
        if updater:
            updater.update_cost_model_costs(start_date, end_date)
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Exemplo n.º 12
0
    def test_get_all_running_tasks(self):
        """Test that multiple hosts' task lists are combined."""
        second_host = "test"
        first_host_list = [1, 2, 3]
        second_host_list = [4, 5, 6]
        expected = first_host_list + second_host_list

        _cache = WorkerCache()
        _cache.set_host_specific_task_list(first_host_list)
        _worker_cache = _cache.worker_cache

        _worker_cache[second_host] = [4, 5, 6]
        cache.set(settings.WORKER_CACHE_KEY, _worker_cache, timeout=None)

        self.assertEqual(_cache.get_all_running_tasks(), expected)
Exemplo n.º 13
0
 def test_active_worker_property(self, mock_inspect):
     """Test the active_workers property."""
     test_matrix = [
         {
             "hostname": "celery@kokuworker",
             "expected_workers": ["kokuworker"]
         },
         {
             "hostname": "kokuworker",
             "expected_workers": ["kokuworker"]
         },
         {
             "hostname": "kokuworker&63)",
             "expected_workers": ["kokuworker&63)"]
         },
         {
             "hostname": "koku@worker&63)",
             "expected_workers": ["worker&63)"]
         },
         {
             "hostname": "",
             "expected_workers": [""]
         },
     ]
     for test in test_matrix:
         with self.subTest(test=test):
             mock_worker_list = {test.get("hostname"): ""}
             mock_inspect.reserved.return_value = mock_worker_list
             _cache = WorkerCache()
             self.assertEqual(_cache.active_workers,
                              test.get("expected_workers"))
Exemplo n.º 14
0
def clear_worker_cache_on_shutdown(sender, **kwargs):  # pragma: no cover
    from masu.processor.worker_cache import WorkerCache

    LOGGER.info("Clearing worker task cache.")
    try:
        WorkerCache().invalidate_host()
    except Exception:
        LOGGER.info("Cache not cleared on shutdown.")
Exemplo n.º 15
0
def clear_worker_cache(sender, instance, **kwargs):  # pragma: no cover
    """Clear WorkerCache after worker is up and running."""
    from .database import check_migrations
    from masu.processor.worker_cache import WorkerCache

    while not check_migrations():
        LOGGER.warning("Migrations not done. Sleeping")
        time.sleep(5)
    LOGGER.info("Clearing worker task cache.")
    WorkerCache().invalidate_host()
Exemplo n.º 16
0
    def test_get_all_running_tasks(self):
        """Test that multiple hosts' task lists are combined."""
        second_host = "test"
        first_host_list = [1, 2, 3]
        second_host_list = [4, 5, 6]
        expected = first_host_list + second_host_list

        _cache = WorkerCache()
        for task in first_host_list:
            _cache.add_task_to_cache(task)

        with patch.object(settings, "HOSTNAME", second_host):
            _cache = WorkerCache()
            for task in second_host_list:
                _cache.add_task_to_cache(task)

        self.assertEqual(sorted(_cache.get_all_running_tasks()), sorted(expected))
Exemplo n.º 17
0
    def test_single_task_caching(self, mock_inspect):
        """Test that single task cache creates and deletes a cache entry."""
        cache = WorkerCache()

        task_name = "test_task"
        task_args = ["schema1", "OCP"]

        self.assertFalse(cache.single_task_is_running(task_name, task_args))
        cache.lock_single_task(task_name, task_args)
        self.assertTrue(cache.single_task_is_running(task_name, task_args))
        cache.release_single_task(task_name, task_args)
        self.assertFalse(cache.single_task_is_running(task_name, task_args))
Exemplo n.º 18
0
    def test_remove_task_from_cache(self):
        """Test that a task is removed."""
        task_key = "task_key"
        _cache = WorkerCache()
        _cache.add_task_to_cache(task_key)
        self.assertEqual(_cache.worker_cache, [task_key])

        _cache.remove_task_from_cache(task_key)
        self.assertEqual(_cache.worker_cache, [])
Exemplo n.º 19
0
    def test_remove_task_from_cache_value_not_in_cache(self):
        """Test that a task is removed."""
        task_list = [1, 2, 3, 4]
        _cache = WorkerCache()
        _cache.set_host_specific_task_list(task_list)
        self.assertEqual(_cache.host_specific_worker_cache, task_list)

        _cache.remove_task_from_cache(5)
        self.assertEqual(_cache.host_specific_worker_cache, task_list)
Exemplo n.º 20
0
def refresh_materialized_views(schema_name,
                               provider_type,
                               manifest_id=None,
                               provider_uuid=None,
                               synchronous=False):
    """Refresh the database's materialized views for reporting."""
    task_name = "masu.processor.tasks.refresh_materialized_views"
    cache_args = [schema_name]
    if not synchronous:
        worker_cache = WorkerCache()
        while worker_cache.single_task_is_running(task_name, cache_args):
            time.sleep(5)

        worker_cache.lock_single_task(task_name, cache_args)
    materialized_views = ()
    if provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL):
        materialized_views = (AWS_MATERIALIZED_VIEWS +
                              OCP_ON_AWS_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_OCP):
        materialized_views = (OCP_MATERIALIZED_VIEWS +
                              OCP_ON_AWS_MATERIALIZED_VIEWS +
                              OCP_ON_AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_AZURE,
                           Provider.PROVIDER_AZURE_LOCAL):
        materialized_views = (AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)

    with schema_context(schema_name):
        for view in materialized_views:
            table_name = view._meta.db_table
            with connection.cursor() as cursor:
                cursor.execute(
                    f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}")
                LOG.info(f"Refreshed {table_name}.")

    invalidate_view_cache_for_tenant_and_source_type(schema_name,
                                                     provider_type)

    if provider_uuid:
        ProviderDBAccessor(provider_uuid).set_data_updated_timestamp()
    if manifest_id:
        # Processing for this monifest should be complete after this step
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)
            manifest_accessor.mark_manifest_as_completed(manifest)

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Exemplo n.º 21
0
    def test_add_task_to_cache(self):
        """Test that a single task is added."""
        task_list = [1, 2, 3]
        expected = [1, 2, 3, 4]
        _cache = WorkerCache()
        _cache.set_host_specific_task_list(task_list)
        self.assertEqual(_cache.host_specific_worker_cache, task_list)

        _cache.add_task_to_cache(4)
        self.assertEqual(_cache.host_specific_worker_cache, expected)
Exemplo n.º 22
0
    def test_remove_task_from_cache(self):
        """Test that a task is removed."""
        task_list = [1, 2, 3, 4]
        expected = [1, 2, 3]
        _cache = WorkerCache()
        _cache.set_host_specific_task_list(task_list)
        self.assertEqual(_cache.host_specific_worker_cache, task_list)

        _cache.remove_task_from_cache(4)
        self.assertEqual(_cache.host_specific_worker_cache, expected)
Exemplo n.º 23
0
    def test_remove_task_from_cache_value_not_in_cache(self):
        """Test that a task is removed."""
        task_list = [1, 2, 3, 4]
        _cache = WorkerCache()
        for task in task_list:
            _cache.add_task_to_cache(task)
        self.assertEqual(_cache.worker_cache, task_list)

        _cache.remove_task_from_cache(5)
        self.assertEqual(_cache.worker_cache, task_list)
Exemplo n.º 24
0
    def test_invalidate_host(self):
        """Test that a host's cache is invalidated."""
        task_list = [1, 2, 3]
        _cache = WorkerCache()

        _cache.set_host_specific_task_list(task_list)
        self.assertEqual(_cache.host_specific_worker_cache, task_list)

        _cache.invalidate_host()

        self.assertEqual(_cache.host_specific_worker_cache, [])
Exemplo n.º 25
0
    def test_invalidate_host(self):
        """Test that a host's cache is invalidated."""
        task_list = [1, 2, 3]
        _cache = WorkerCache()

        for task in task_list:
            _cache.add_task_to_cache(task)
        self.assertEqual(_cache.worker_cache, task_list)

        _cache.invalidate_host()

        self.assertEqual(_cache.worker_cache, [])
Exemplo n.º 26
0
def update_cost_model_costs(schema_name,
                            provider_uuid,
                            start_date=None,
                            end_date=None,
                            provider_type=None,
                            synchronous=False):
    """Update usage charge information.

    Args:
        schema_name (str) The DB schema name.
        provider_uuid (str) The provider uuid.
        start_date (str, Optional) - Start date of range to update derived cost.
        end_date (str, Optional) - End date of range to update derived cost.

    Returns
        None

    """
    task_name = "masu.processor.tasks.update_cost_model_costs"
    cache_args = [schema_name, provider_uuid, start_date, end_date]
    if not synchronous:
        worker_cache = WorkerCache()
        while worker_cache.single_task_is_running(task_name, cache_args):
            time.sleep(5)
        worker_cache.lock_single_task(task_name, cache_args, timeout=300)

    worker_stats.COST_MODEL_COST_UPDATE_ATTEMPTS_COUNTER.inc()

    stmt = (f"update_cost_model_costs called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
    LOG.info(stmt)

    updater = CostModelCostUpdater(schema_name, provider_uuid)
    if updater:
        updater.update_cost_model_costs(start_date, end_date)

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Exemplo n.º 27
0
class ReportDownloaderBase:
    """
    Download cost reports from a provider.

    Base object class for downloading cost reports from a cloud provider.
    """

    def __init__(self, task, download_path=None, **kwargs):
        """
        Create a downloader.

        Args:
            task          (Object) bound celery object
            download_path (String) filesystem path to store downloaded files

        Kwargs:
            customer_name     (String) customer name
            access_credential (Dict) provider access credentials
            report_source     (String) cost report source
            provider_type     (String) cloud provider type
            provider_uuid     (String) cloud provider uuid
            report_name       (String) cost report name

        """
        self._task = task

        if download_path:
            self.download_path = download_path
        else:
            self.download_path = mkdtemp(prefix="masu")
        self.worker_cache = WorkerCache()
        self._cache_key = kwargs.get("cache_key")
        self._provider_uuid = kwargs.get("provider_uuid")
        self.request_id = kwargs.get("request_id")
        self.account = kwargs.get("account")
        self.context = {"request_id": self.request_id, "provider_uuid": self._provider_uuid, "account": self.account}

    def _get_existing_manifest_db_id(self, assembly_id):
        """Return a manifest DB object if it exists."""
        manifest_id = None
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest(assembly_id, self._provider_uuid)
            if manifest:
                manifest_id = manifest.id
        return manifest_id

    def check_if_manifest_should_be_downloaded(self, assembly_id):
        """Check if we should download this manifest.

        We first check if we have a database record of this manifest.
        That would indicate that we have already downloaded and at least
        begun processing. We then check the last completed time for
        a file in this manifest. This second check is to cover the case
        when we did not complete processing and need to re-downlaod and
        process the manifest.

        Returns True if the manifest should be downloaded and processed.
        """
        if self._cache_key and self.worker_cache.task_is_running(self._cache_key):
            msg = f"{self._cache_key} is currently running."
            LOG.info(log_json(self.request_id, msg, self.context))
            return False
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest(assembly_id, self._provider_uuid)

            if manifest:
                manifest_id = manifest.id
                # check if `last_completed_datetime` is null for any report in the manifest.
                # if nulls exist, report processing is not complete and reports should be downloaded.
                need_to_download = manifest_accessor.is_last_completed_datetime_null(manifest_id)
                if need_to_download:
                    self.worker_cache.add_task_to_cache(self._cache_key)
                return need_to_download

        # The manifest does not exist, this is the first time we are
        # downloading and processing it.
        self.worker_cache.add_task_to_cache(self._cache_key)
        return True

    def _process_manifest_db_record(self, assembly_id, billing_start, num_of_files):
        """Insert or update the manifest DB record."""
        LOG.info("Inserting/updating manifest in database for assembly_id: %s", assembly_id)

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest_entry = manifest_accessor.get_manifest(assembly_id, self._provider_uuid)

            if not manifest_entry:
                msg = f"No manifest entry found in database. Adding for bill period start: {billing_start}"
                LOG.info(log_json(self.request_id, msg, self.context))
                manifest_dict = {
                    "assembly_id": assembly_id,
                    "billing_period_start_datetime": billing_start,
                    "num_total_files": num_of_files,
                    "provider_uuid": self._provider_uuid,
                    "task": self._task.request.id,
                }
                manifest_entry = manifest_accessor.add(**manifest_dict)

            manifest_accessor.mark_manifest_as_updated(manifest_entry)
            manifest_id = manifest_entry.id

        return manifest_id
Exemplo n.º 28
0
class Orchestrator:
    """
    Orchestrator for report processing.

    Top level object which is responsible for:
    * Maintaining a current list of accounts
    * Ensuring that reports are downloaded and processed for all accounts.

    """
    def __init__(self, billing_source=None, provider_uuid=None):
        """
        Orchestrator for processing.

        Args:
            billing_source (String): Individual account to retrieve.

        """
        self._accounts, self._polling_accounts = self.get_accounts(
            billing_source, provider_uuid)
        self.worker_cache = WorkerCache()

    @staticmethod
    def get_accounts(billing_source=None, provider_uuid=None):
        """
        Prepare a list of accounts for the orchestrator to get CUR from.

        If billing_source is not provided all accounts will be returned, otherwise
        only the account for the provided billing_source will be returned.

        Still a work in progress, but works for now.

        Args:
            billing_source (String): Individual account to retrieve.

        Returns:
            [CostUsageReportAccount] (all), [CostUsageReportAccount] (polling only)

        """
        all_accounts = []
        polling_accounts = []
        try:
            all_accounts = AccountsAccessor().get_accounts(provider_uuid)
        except AccountsAccessorError as error:
            LOG.error("Unable to get accounts. Error: %s", str(error))

        if billing_source:
            for account in all_accounts:
                if billing_source == account.get("billing_source"):
                    all_accounts = [account]

        for account in all_accounts:
            if AccountsAccessor().is_polling_account(account):
                polling_accounts.append(account)

        return all_accounts, polling_accounts

    @staticmethod
    def get_reports(provider_uuid):
        """
        Get months for provider to process.

        Args:
            (String) provider uuid to determine if initial setup is complete.

        Returns:
            (List) List of datetime objects.

        """
        with ProviderDBAccessor(
                provider_uuid=provider_uuid) as provider_accessor:
            reports_processed = provider_accessor.get_setup_complete()

        if Config.INGEST_OVERRIDE or not reports_processed:
            number_of_months = Config.INITIAL_INGEST_NUM_MONTHS
        else:
            number_of_months = 2

        return DateAccessor().get_billing_months(number_of_months)

    def start_manifest_processing(self, customer_name, credentials,
                                  data_source, provider_type, schema_name,
                                  provider_uuid, report_month):
        """
        Start processing an account's manifest for the specified report_month.

        Args:
            (String) customer_name - customer name
            (String) credentials - credentials object
            (String) data_source - report storage location
            (String) schema_name - db tenant
            (String) provider_uuid - provider unique identifier
            (Date)   report_month - month to get latest manifest

        Returns:
            ({}) Dictionary containing the following keys:
                manifest_id - (String): Manifest ID for ReportManifestDBAccessor
                assembly_id - (String): UUID identifying report file
                compression - (String): Report compression format
                files       - ([{"key": full_file_path "local_file": "local file name"}]): List of report files.
        """
        downloader = ReportDownloader(
            customer_name=customer_name,
            credentials=credentials,
            data_source=data_source,
            provider_type=provider_type,
            provider_uuid=provider_uuid,
            report_name=None,
        )
        manifest = downloader.download_manifest(report_month)

        if manifest:
            LOG.info("Saving all manifest file names.")
            record_all_manifest_files(manifest["manifest_id"], [
                report.get("local_file")
                for report in manifest.get("files", [])
            ])

        LOG.info(f"Found Manifests: {str(manifest)}")
        report_files = manifest.get("files", [])
        report_tasks = []
        for report_file_dict in report_files:
            local_file = report_file_dict.get("local_file")
            report_file = report_file_dict.get("key")

            # Check if report file is complete or in progress.
            if record_report_status(manifest["manifest_id"], local_file,
                                    "no_request"):
                LOG.info(f"{local_file} was already processed")
                continue

            cache_key = f"{provider_uuid}:{report_file}"
            if self.worker_cache.task_is_running(cache_key):
                LOG.info(f"{local_file} process is in progress")
                continue

            report_context = manifest.copy()
            report_context["current_file"] = report_file
            report_context["local_file"] = local_file
            report_context["key"] = report_file

            report_tasks.append(
                get_report_files.s(
                    customer_name,
                    credentials,
                    data_source,
                    provider_type,
                    schema_name,
                    provider_uuid,
                    report_month,
                    report_context,
                ))
            LOG.info("Download queued - schema_name: %s.", schema_name)

        if report_tasks:
            async_id = chord(report_tasks, summarize_reports.s())()
            LOG.info(f"Manifest Processing Async ID: {async_id}")
        return manifest

    def prepare(self):
        """
        Prepare a processing request for each account.

        Scans the database for providers that have reports that need to be processed.
        Any report it finds is queued to the appropriate celery task to download
        and process those reports.

        Args:
            None

        Returns:
            (celery.result.AsyncResult) Async result for download request.

        """
        async_result = None
        for account in self._polling_accounts:
            provider_uuid = account.get("provider_uuid")
            report_months = self.get_reports(provider_uuid)
            for month in report_months:
                LOG.info(
                    "Getting %s report files for account (provider uuid): %s",
                    month.strftime("%B %Y"), provider_uuid)
                account["report_month"] = month
                try:
                    self.start_manifest_processing(**account)
                except ReportDownloaderError as err:
                    LOG.warning(
                        f"Unable to download manifest for provider: {provider_uuid}. Error: {str(err)}."
                    )
                    continue
                except Exception as err:
                    # Broad exception catching is important here because any errors thrown can
                    # block all subsequent account processing.
                    LOG.error(
                        f"Unexpected manifest processing error for provider: {provider_uuid}. Error: {str(err)}."
                    )
                    continue

                # update labels
                labeler = AccountLabel(
                    auth=account.get("credentials"),
                    schema=account.get("schema_name"),
                    provider_type=account.get("provider_type"),
                )
                account_number, label = labeler.get_label_details()
                if account_number:
                    LOG.info("Account: %s Label: %s updated.", account_number,
                             label)

        return async_result

    def remove_expired_report_data(self,
                                   simulate=False,
                                   line_items_only=False):
        """
        Remove expired report data for each account.

        Args:
            simulate (Boolean) Simulate report data removal

        Returns:
            (celery.result.AsyncResult) Async result for deletion request.

        """
        async_results = []
        for account in self._accounts:
            LOG.info("Calling remove_expired_data with account: %s", account)
            async_result = remove_expired_data.delay(
                schema_name=account.get("schema_name"),
                provider=account.get("provider_type"),
                simulate=simulate,
                line_items_only=line_items_only,
            )
            LOG.info(
                "Expired data removal queued - schema_name: %s, Task ID: %s",
                account.get("schema_name"),
                str(async_result),
            )
            async_results.append({
                "customer": account.get("customer_name"),
                "async_id": str(async_result)
            })
        return async_results
Exemplo n.º 29
0
def update_openshift_on_cloud(
    self,
    schema_name,
    openshift_provider_uuid,
    infrastructure_provider_uuid,
    infrastructure_provider_type,
    start_date,
    end_date,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
    tracing_id=None,
):
    """Update OpenShift on Cloud for a specific OpenShift and cloud source."""
    task_name = "masu.processor.tasks.update_openshift_on_cloud"
    cache_args = [schema_name, infrastructure_provider_uuid]
    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(log_json(tracing_id, msg))
            update_openshift_on_cloud.s(
                schema_name,
                openshift_provider_uuid,
                infrastructure_provider_uuid,
                infrastructure_provider_type,
                start_date,
                end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
                synchronous=synchronous,
                tracing_id=tracing_id,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name,
                                      cache_args,
                                      timeout=settings.WORKER_CACHE_TIMEOUT)
    stmt = (f"update_openshift_on_cloud called with args: "
            f" schema_name: {schema_name}, "
            f" openshift_provider_uuid: {openshift_provider_uuid}, "
            f" infrastructure_provider_uuid: {infrastructure_provider_uuid}, "
            f" infrastructure_provider_type: {infrastructure_provider_type}, "
            f" start_date: {start_date}, "
            f" end_date: {end_date}, "
            f" manifest_id: {manifest_id}, "
            f" queue_name: {queue_name}, "
            f" tracing_id: {tracing_id}")
    LOG.info(log_json(tracing_id, stmt))

    try:
        updater = ReportSummaryUpdater(schema_name,
                                       infrastructure_provider_uuid,
                                       manifest_id, tracing_id)
        updater.update_openshift_on_cloud_summary_tables(
            start_date,
            end_date,
            openshift_provider_uuid,
            infrastructure_provider_uuid,
            infrastructure_provider_type,
            tracing_id,
        )
    except ReportSummaryUpdaterCloudError as ex:
        LOG.info(
            log_json(
                tracing_id,
                (
                    f"update_openshift_on_cloud failed for: {infrastructure_provider_type} ",
                    f"provider: {infrastructure_provider_uuid}, ",
                    f"OpenShift provider {openshift_provider_uuid}. \nError: {ex}\n",
                    f"Retry {self.request.retries} of {settings.MAX_UPDATE_RETRIES}",
                ),
            ))
        raise ReportSummaryUpdaterCloudError
    finally:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
Exemplo n.º 30
0
def get_report_files(self, customer_name, authentication, billing_source,
                     provider_type, schema_name, provider_uuid, report_month):
    """
    Task to download a Report and process the report.

    FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests.
    Once we know a realistic processing time for the largest CUR file in production
    this value can be adjusted or made configurable.

    Args:
        customer_name     (String): Name of the customer owning the cost usage report.
        authentication    (String): Credential needed to access cost usage report
                                    in the backend provider.
        billing_source    (String): Location of the cost usage report in the backend provider.
        provider_type     (String): Koku defined provider type string.  Example: Amazon = 'AWS'
        schema_name       (String): Name of the DB schema

    Returns:
        None

    """
    worker_stats.GET_REPORT_ATTEMPTS_COUNTER.labels(
        provider_type=provider_type).inc()
    month = report_month
    if isinstance(report_month, str):
        month = parser.parse(report_month)

    cache_key = f"{provider_uuid}:{month}"
    reports = _get_report_files(self, customer_name, authentication,
                                billing_source, provider_type, provider_uuid,
                                month, cache_key)

    stmt = (f"Reports to be processed:\n"
            f" schema_name: {customer_name}\n"
            f" provider: {provider_type}\n"
            f" provider_uuid: {provider_uuid}\n")
    for report in reports:
        stmt += " file: " + str(report["file"]) + "\n"
    LOG.info(stmt[:-1])
    reports_to_summarize = []
    for report_dict in reports:
        with transaction.atomic():
            try:
                manifest_id = report_dict.get("manifest_id")
                file_name = os.path.basename(report_dict.get("file"))
                with ReportStatsDBAccessor(file_name, manifest_id) as stats:
                    started_date = stats.get_last_started_datetime()
                    completed_date = stats.get_last_completed_datetime()

                # Skip processing if already in progress.
                if started_date and not completed_date:
                    expired_start_date = started_date + datetime.timedelta(
                        hours=Config.REPORT_PROCESSING_TIMEOUT_HOURS)
                    if DateAccessor().today_with_timezone(
                            "UTC") < expired_start_date:
                        LOG.info(
                            "Skipping processing task for %s since it was started at: %s.",
                            file_name,
                            str(started_date),
                        )
                        continue

                stmt = (f"Processing starting:\n"
                        f" schema_name: {customer_name}\n"
                        f" provider: {provider_type}\n"
                        f" provider_uuid: {provider_uuid}\n"
                        f' file: {report_dict.get("file")}')
                LOG.info(stmt)
                worker_stats.PROCESS_REPORT_ATTEMPTS_COUNTER.labels(
                    provider_type=provider_type).inc()
                _process_report_file(schema_name, provider_type, provider_uuid,
                                     report_dict)
                known_manifest_ids = [
                    report.get("manifest_id")
                    for report in reports_to_summarize
                ]
                if report_dict.get("manifest_id") not in known_manifest_ids:
                    report_meta = {
                        "schema_name": schema_name,
                        "provider_type": provider_type,
                        "provider_uuid": provider_uuid,
                        "manifest_id": report_dict.get("manifest_id"),
                    }
                    reports_to_summarize.append(report_meta)
            except ReportProcessorError as processing_error:
                worker_stats.PROCESS_REPORT_ERROR_COUNTER.labels(
                    provider_type=provider_type).inc()
                LOG.error(str(processing_error))
                WorkerCache().remove_task_from_cache(cache_key)
                raise processing_error

    WorkerCache().remove_task_from_cache(cache_key)

    return reports_to_summarize