Пример #1
0
    def test_disk_status_logging(self, fake_downloader):
        """Test task for logging when temp directory exists."""
        logging.disable(logging.NOTSET)
        os.makedirs(Config.TMP_DIR, exist_ok=True)

        account = fake_arn(service="iam", generate_account_id=True)
        expected = "INFO:masu.processor._tasks.download:Available disk space"
        with self.assertLogs("masu.processor._tasks.download",
                             level="INFO") as logger:
            _get_report_files(
                Mock(),
                customer_name=self.fake.word(),
                authentication=account,
                provider_type=Provider.PROVIDER_AWS,
                report_month=DateHelper().today,
                provider_uuid=self.aws_provider_uuid,
                billing_source=self.fake.word(),
                cache_key=self.fake.word(),
            )
            statement_found = False
            for log in logger.output:
                if expected in log:
                    statement_found = True
            self.assertTrue(statement_found)

        shutil.rmtree(Config.TMP_DIR, ignore_errors=True)
Пример #2
0
    def test_get_report_exception(self, fake_downloader):
        """Test task"""
        account = fake_arn(service='iam', generate_account_id=True)

        with self.assertRaises(Exception):
            _get_report_files(customer_name=self.fake.word(),
                              authentication=account,
                              provider_type='AWS',
                              report_name=self.fake.word(),
                              billing_source=self.fake.word())
Пример #3
0
    def test_get_report_update_status(self, fake_downloader, fake_status):
        """Test that status is updated when downloading is complete."""
        account = fake_arn(service='iam', generate_account_id=True)

        _get_report_files(customer_name=self.fake.word(),
                          authentication=account,
                          provider_type='AWS',
                          report_name=self.fake.word(),
                          provider_uuid=self.aws_test_provider_uuid,
                          billing_source=self.fake.word())
        fake_status.assert_called_with(ProviderStatusCode.READY)
Пример #4
0
    def test_get_report_exception(self, fake_downloader):
        """Test task."""
        account = fake_arn(service="iam", generate_account_id=True)

        with self.assertRaises(Exception):
            _get_report_files(
                Mock(),
                customer_name=self.fake.word(),
                authentication=account,
                provider_type=Provider.PROVIDER_AWS,
                report_month=DateAccessor().today(),
                provider_uuid=self.aws_provider_uuid,
                billing_source=self.fake.word(),
            )
Пример #5
0
    def test_get_report_update_status(self, fake_downloader, fake_status):
        """Test that status is updated when downloading is complete."""
        account = fake_arn(service="iam", generate_account_id=True)

        _get_report_files(
            Mock(),
            customer_name=self.fake.word(),
            authentication=account,
            provider_type=Provider.PROVIDER_AWS,
            report_month=DateAccessor().today(),
            provider_uuid=self.aws_provider_uuid,
            billing_source=self.fake.word(),
        )
        fake_status.assert_called_with(ProviderStatusCode.READY)
Пример #6
0
    def test_disk_status_logging_no_dir(self, fake_downloader):
        """Test task for logging when temp directory does not exist."""
        logging.disable(logging.NOTSET)

        shutil.rmtree(Config.TMP_DIR, ignore_errors=True)

        account = fake_arn(service='iam', generate_account_id=True)
        expected = 'INFO:masu.processor._tasks.download:Unable to find avaiable disk space. {} does not exist'.format(Config.TMP_DIR)
        with self.assertLogs('masu.processor._tasks.download', level='INFO') as logger:
            _get_report_files(customer_name=self.fake.word(),
                              authentication=account,
                              provider_type='AWS',
                              report_name=self.fake.word(),
                              billing_source=self.fake.word())
            self.assertIn(expected, logger.output)
Пример #7
0
    def test_get_report_exception_update_status(self,
                                                fake_downloader,
                                                fake_status):
        """Test that status is updated when an exception is raised."""
        account = fake_arn(service='iam', generate_account_id=True)

        try:
            _get_report_files(customer_name=self.fake.word(),
                              authentication=account,
                              provider_type='AWS',
                              report_name=self.fake.word(),
                              provider_uuid=self.aws_test_provider_uuid,
                              billing_source=self.fake.word())
        except ReportDownloaderError:
            pass
        fake_status.assert_called()
Пример #8
0
    def test_get_report_without_override(self, fake_accessor, fake_report_files):
        """Test _get_report_files for two months."""
        initial_month_qty = 2

        account = fake_arn(service='iam', generate_account_id=True)
        with patch.object(ReportDownloader, 'get_reports') as download_call:
            _get_report_files(
                customer_name=self.fake.word(),
                authentication=account,
                provider_type='AWS',
                report_name=self.fake.word(),
                provider_uuid=self.aws_test_provider_uuid,
                billing_source=self.fake.word(),
            )

            download_call.assert_called_with(initial_month_qty)
Пример #9
0
    def test_disk_status_logging_no_dir(self, fake_downloader):
        """Test task for logging when temp directory does not exist."""
        logging.disable(logging.NOTSET)

        Config.TMP_DIR = '/this/path/does/not/exist'

        account = fake_arn(service='iam', generate_account_id=True)
        expected = 'INFO:masu.processor._tasks.download:Unable to find' + \
            f' available disk space. {Config.TMP_DIR} does not exist'
        with self.assertLogs('masu.processor._tasks.download', level='INFO') as logger:
            _get_report_files(customer_name=self.fake.word(),
                              authentication=account,
                              provider_type='AWS',
                              report_name=self.fake.word(),
                              provider_uuid=self.aws_test_provider_uuid,
                              billing_source=self.fake.word())
            self.assertIn(expected, logger.output)
Пример #10
0
    def test_get_report_exception_update_status(self, fake_downloader, fake_status):
        """Test that status is updated when an exception is raised."""
        account = fake_arn(service="iam", generate_account_id=True)

        try:
            _get_report_files(
                Mock(),
                customer_name=self.fake.word(),
                authentication=account,
                provider_type=Provider.PROVIDER_AWS,
                report_month=DateAccessor().today(),
                provider_uuid=self.aws_provider_uuid,
                billing_source=self.fake.word(),
            )
        except ReportDownloaderError:
            pass
        fake_status.assert_called()
Пример #11
0
    def test_get_report_with_override(self, fake_accessor, fake_report_files):
        """Test _get_report_files on non-initial load with override set."""
        Config.INGEST_OVERRIDE = True
        Config.INITIAL_INGEST_NUM_MONTHS = 5
        initial_month_qty = Config.INITIAL_INGEST_NUM_MONTHS

        account = fake_arn(service='iam', generate_account_id=True)
        with patch.object(ReportDownloader, 'get_reports') as download_call:
            _get_report_files(customer_name=self.fake.word(),
                              authentication=account,
                              provider_type='AWS',
                              report_name=self.fake.word(),
                              provider_uuid=self.aws_test_provider_uuid,
                              billing_source=self.fake.word())

            download_call.assert_called_with(initial_month_qty)

        Config.INGEST_OVERRIDE = False
        Config.INITIAL_INGEST_NUM_MONTHS = 2
Пример #12
0
    def test_get_report(self, fake_downloader):
        """Test task"""
        account = fake_arn(service='iam', generate_account_id=True)
        report = _get_report_files(customer_name=self.fake.word(),
                                   authentication=account,
                                   provider_type='AWS',
                                   report_name=self.fake.word(),
                                   billing_source=self.fake.word())

        self.assertIsInstance(report, list)
        self.assertGreater(len(report), 0)
Пример #13
0
    def test_disk_status_logging(self, fake_downloader):
        """Test task for logging when temp directory exists."""
        logging.disable(logging.NOTSET)

        os.makedirs(Config.TMP_DIR, exist_ok=True)

        account = fake_arn(service='iam', generate_account_id=True)
        expected = 'INFO:masu.processor._tasks.download:Avaiable disk space'
        with self.assertLogs('masu.processor._tasks.download', level='INFO') as logger:
            _get_report_files(customer_name=self.fake.word(),
                              authentication=account,
                              provider_type='AWS',
                              report_name=self.fake.word(),
                              billing_source=self.fake.word())
            statement_found = False
            for log in logger.output:
                if expected in log:
                    statement_found = True
            self.assertTrue(statement_found)

        shutil.rmtree(Config.TMP_DIR, ignore_errors=True)
Пример #14
0
    def test_disk_status_logging_no_dir(self, fake_downloader):
        """Test task for logging when temp directory does not exist."""
        logging.disable(logging.NOTSET)

        Config.PVC_DIR = "/this/path/does/not/exist"

        account = fake_arn(service="iam", generate_account_id=True)
        expected = (
            "INFO:masu.processor._tasks.download:Unable to find"
            + f" available disk space. {Config.PVC_DIR} does not exist"
        )
        with self.assertLogs("masu.processor._tasks.download", level="INFO") as logger:
            _get_report_files(
                Mock(),
                customer_name=self.fake.word(),
                authentication=account,
                provider_type=Provider.PROVIDER_AWS,
                report_month=DateAccessor().today(),
                provider_uuid=self.aws_provider_uuid,
                billing_source=self.fake.word(),
            )
            self.assertIn(expected, logger.output)
Пример #15
0
    def test_get_report(self, fake_downloader):
        """Test task."""
        account = fake_arn(service="iam", generate_account_id=True)
        report = _get_report_files(
            Mock(),
            customer_name=self.fake.word(),
            authentication=account,
            provider_type=Provider.PROVIDER_AWS,
            report_month=DateAccessor().today(),
            provider_uuid=self.aws_provider_uuid,
            billing_source=self.fake.word(),
        )

        self.assertIsInstance(report, list)
        self.assertGreater(len(report), 0)
Пример #16
0
def get_report_files(self, customer_name, authentication, billing_source,
                     provider_type, schema_name, provider_uuid, report_month):
    """
    Task to download a Report and process the report.

    FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests.
    Once we know a realistic processing time for the largest CUR file in production
    this value can be adjusted or made configurable.

    Args:
        customer_name     (String): Name of the customer owning the cost usage report.
        authentication    (String): Credential needed to access cost usage report
                                    in the backend provider.
        billing_source    (String): Location of the cost usage report in the backend provider.
        provider_type     (String): Koku defined provider type string.  Example: Amazon = 'AWS'
        schema_name       (String): Name of the DB schema

    Returns:
        None

    """
    worker_stats.GET_REPORT_ATTEMPTS_COUNTER.labels(
        provider_type=provider_type).inc()
    month = parser.parse(report_month)
    reports = _get_report_files(self, customer_name, authentication,
                                billing_source, provider_type, provider_uuid,
                                month)

    try:
        stmt = (f"Reports to be processed:\n"
                f" schema_name: {customer_name}\n"
                f" provider: {provider_type}\n"
                f" provider_uuid: {provider_uuid}\n")
        for report in reports:
            stmt += " file: " + str(report["file"]) + "\n"
        LOG.info(stmt[:-1])
        reports_to_summarize = []
        for report_dict in reports:
            manifest_id = report_dict.get("manifest_id")
            file_name = os.path.basename(report_dict.get("file"))
            with ReportStatsDBAccessor(file_name, manifest_id) as stats:
                started_date = stats.get_last_started_datetime()
                completed_date = stats.get_last_completed_datetime()

            # Skip processing if already in progress.
            if started_date and not completed_date:
                expired_start_date = started_date + datetime.timedelta(hours=2)
                if DateAccessor().today_with_timezone(
                        "UTC") < expired_start_date:
                    LOG.info(
                        "Skipping processing task for %s since it was started at: %s.",
                        file_name, str(started_date))
                    continue

            # Skip processing if complete.
            if started_date and completed_date:
                LOG.info(
                    "Skipping processing task for %s. Started on: %s and completed on: %s.",
                    file_name,
                    str(started_date),
                    str(completed_date),
                )
                continue

            stmt = (f"Processing starting:\n"
                    f" schema_name: {customer_name}\n"
                    f" provider: {provider_type}\n"
                    f" provider_uuid: {provider_uuid}\n"
                    f' file: {report_dict.get("file")}')
            LOG.info(stmt)
            worker_stats.PROCESS_REPORT_ATTEMPTS_COUNTER.labels(
                provider_type=provider_type).inc()
            _process_report_file(schema_name, provider_type, provider_uuid,
                                 report_dict)
            report_meta = {}
            known_manifest_ids = [
                report.get("manifest_id") for report in reports_to_summarize
            ]
            if report_dict.get("manifest_id") not in known_manifest_ids:
                report_meta["schema_name"] = schema_name
                report_meta["provider_type"] = provider_type
                report_meta["provider_uuid"] = provider_uuid
                report_meta["manifest_id"] = report_dict.get("manifest_id")
                reports_to_summarize.append(report_meta)
    except ReportProcessorError as processing_error:
        worker_stats.PROCESS_REPORT_ERROR_COUNTER.labels(
            provider_type=provider_type).inc()
        LOG.error(str(processing_error))
        raise processing_error

    return reports_to_summarize
Пример #17
0
def get_report_files(self, customer_name, authentication, billing_source,
                     provider_type, schema_name, provider_uuid, report_month):
    """
    Task to download a Report and process the report.

    FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests.
    Once we know a realistic processing time for the largest CUR file in production
    this value can be adjusted or made configurable.

    Args:
        customer_name     (String): Name of the customer owning the cost usage report.
        authentication    (String): Credential needed to access cost usage report
                                    in the backend provider.
        billing_source    (String): Location of the cost usage report in the backend provider.
        provider_type     (String): Koku defined provider type string.  Example: Amazon = 'AWS'
        schema_name       (String): Name of the DB schema

    Returns:
        None

    """
    worker_stats.GET_REPORT_ATTEMPTS_COUNTER.labels(
        provider_type=provider_type).inc()
    month = report_month
    if isinstance(report_month, str):
        month = parser.parse(report_month)

    cache_key = f"{provider_uuid}:{month}"
    reports = _get_report_files(self, customer_name, authentication,
                                billing_source, provider_type, provider_uuid,
                                month, cache_key)

    stmt = (f"Reports to be processed:\n"
            f" schema_name: {customer_name}\n"
            f" provider: {provider_type}\n"
            f" provider_uuid: {provider_uuid}\n")
    for report in reports:
        stmt += " file: " + str(report["file"]) + "\n"
    LOG.info(stmt[:-1])
    reports_to_summarize = []
    start_date = None

    for report_dict in reports:
        with transaction.atomic():
            try:
                manifest_id = report_dict.get("manifest_id")
                file_name = os.path.basename(report_dict.get("file"))
                with ReportStatsDBAccessor(file_name, manifest_id) as stats:
                    started_date = stats.get_last_started_datetime()
                    completed_date = stats.get_last_completed_datetime()

                # Skip processing if already in progress.
                if started_date and not completed_date:
                    expired_start_date = started_date + datetime.timedelta(
                        hours=Config.REPORT_PROCESSING_TIMEOUT_HOURS)
                    if DateAccessor().today_with_timezone(
                            "UTC") < expired_start_date:
                        LOG.info(
                            "Skipping processing task for %s since it was started at: %s.",
                            file_name,
                            str(started_date),
                        )
                        continue

                stmt = (f"Processing starting:\n"
                        f" schema_name: {customer_name}\n"
                        f" provider: {provider_type}\n"
                        f" provider_uuid: {provider_uuid}\n"
                        f' file: {report_dict.get("file")}')
                LOG.info(stmt)
                if not start_date:
                    start_date = report_dict.get("start_date")
                worker_stats.PROCESS_REPORT_ATTEMPTS_COUNTER.labels(
                    provider_type=provider_type).inc()
                _process_report_file(schema_name, provider_type, provider_uuid,
                                     report_dict)
                known_manifest_ids = [
                    report.get("manifest_id")
                    for report in reports_to_summarize
                ]
                if report_dict.get("manifest_id") not in known_manifest_ids:
                    report_meta = {
                        "schema_name": schema_name,
                        "provider_type": provider_type,
                        "provider_uuid": provider_uuid,
                        "manifest_id": report_dict.get("manifest_id"),
                    }
                    reports_to_summarize.append(report_meta)
            except (ReportProcessorError,
                    ReportProcessorDBError) as processing_error:
                worker_stats.PROCESS_REPORT_ERROR_COUNTER.labels(
                    provider_type=provider_type).inc()
                LOG.error(str(processing_error))
                WorkerCache().remove_task_from_cache(cache_key)
                raise processing_error

    WorkerCache().remove_task_from_cache(cache_key)
    if start_date:
        start_date_str = start_date.strftime("%Y-%m-%d")
        convert_to_parquet.delay(self.request.id, schema_name[4:],
                                 provider_uuid, provider_type, start_date_str,
                                 manifest_id)

    return reports_to_summarize
Пример #18
0
def get_report_files(customer_name,
                     authentication,
                     billing_source,
                     provider_type,
                     schema_name,
                     report_name=None):
    """
    Task to download a Report.

    Note that report_name will be not optional once Koku can specify
    what report we should download.

    FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests.
    Once we know a realistic processing time for the largest CUR file in production
    this value can be adjusted or made configurable.

    Args:
        customer_name     (String): Name of the customer owning the cost usage report.
        authentication    (String): Credential needed to access cost usage report
                                    in the backend provider.
        billing_source    (String): Location of the cost usage report in the backend provider.
        provider_type     (String): Koku defined provider type string.  Example: Amazon = 'AWS'
        schema_name       (String): Name of the DB schema
        report_name       (String): Name of the cost usage report to download.

    Returns:
        files (List) List of filenames with full local path.
               Example: ['/var/tmp/masu/my-report-name/aws/my-report-file.csv',
                         '/var/tmp/masu/other-report-name/aws/other-report-file.csv']

    """
    reports = _get_report_files(customer_name,
                                authentication,
                                billing_source,
                                provider_type,
                                report_name)

    # initiate chained async task
    LOG.info('Reports to be processed: %s', str(reports))
    for report_dict in reports:
        file_name = os.path.basename(report_dict.get('file'))
        stats = ReportStatsDBAccessor(file_name)
        started_date = stats.get_last_started_datetime()
        completed_date = stats.get_last_completed_datetime()
        stats.close_session()

        # Skip processing if already in progress.
        if started_date and not completed_date:
            expired_start_date = (started_date + datetime.timedelta(hours=2))\
                .replace(tzinfo=pytz.UTC)
            if DateAccessor().today().replace(tzinfo=pytz.UTC) < expired_start_date:
                LOG.info('Skipping processing task for %s since it was started at: %s.',
                         file_name, str(started_date))
                continue

        # Skip processing if complete.
        if started_date and completed_date:
            LOG.info('Skipping processing task for %s. Started on: %s and completed on: %s.',
                     file_name, str(started_date), str(completed_date))
            continue

        request = {'schema_name': schema_name,
                   'report_path': report_dict.get('file'),
                   'compression': report_dict.get('compression')}
        result = process_report_file.delay(**request)
        LOG.info('Processing task queued - File: %s, Task ID: %s',
                 report_dict.get('file'),
                 str(result))
Пример #19
0
def get_report_files(  # noqa: C901
    self,
    customer_name,
    authentication,
    billing_source,
    provider_type,
    schema_name,
    provider_uuid,
    report_month,
    report_context,
    tracing_id=None,
):
    """
    Task to download a Report and process the report.

    FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests.
    Once we know a realistic processing time for the largest CUR file in production
    this value can be adjusted or made configurable.

    Args:
        customer_name     (String): Name of the customer owning the cost usage report.
        authentication    (String): Credential needed to access cost usage report
                                    in the backend provider.
        billing_source    (String): Location of the cost usage report in the backend provider.
        provider_type     (String): Koku defined provider type string.  Example: Amazon = 'AWS'
        schema_name       (String): Name of the DB schema

    Returns:
        None

    """
    try:
        worker_stats.GET_REPORT_ATTEMPTS_COUNTER.labels(
            provider_type=provider_type).inc()
        month = report_month
        if isinstance(report_month, str):
            month = parser.parse(report_month)
        report_file = report_context.get("key")
        cache_key = f"{provider_uuid}:{report_file}"
        tracing_id = report_context.get("assembly_id", "no-tracing-id")
        WorkerCache().add_task_to_cache(cache_key)

        context = {
            "account": customer_name[4:],
            "provider_uuid": provider_uuid
        }

        try:
            report_dict = _get_report_files(
                tracing_id,
                customer_name,
                authentication,
                billing_source,
                provider_type,
                provider_uuid,
                month,
                report_context,
            )
        except (MasuProcessingError, MasuProviderError,
                ReportDownloaderError) as err:
            worker_stats.REPORT_FILE_DOWNLOAD_ERROR_COUNTER.labels(
                provider_type=provider_type).inc()
            WorkerCache().remove_task_from_cache(cache_key)
            LOG.warning(log_json(tracing_id, str(err), context))
            return

        stmt = (f"Reports to be processed: "
                f" schema_name: {customer_name} "
                f" provider: {provider_type} "
                f" provider_uuid: {provider_uuid}")
        if report_dict:
            stmt += f" file: {report_dict['file']}"
            LOG.info(log_json(tracing_id, stmt, context))
        else:
            WorkerCache().remove_task_from_cache(cache_key)
            return None

        report_meta = {
            "schema_name": schema_name,
            "provider_type": provider_type,
            "provider_uuid": provider_uuid,
            "manifest_id": report_dict.get("manifest_id"),
            "tracing_id": tracing_id,
        }

        try:
            stmt = (f"Processing starting: "
                    f" schema_name: {customer_name} "
                    f" provider: {provider_type} "
                    f" provider_uuid: {provider_uuid} "
                    f' file: {report_dict.get("file")}')
            LOG.info(log_json(tracing_id, stmt))
            worker_stats.PROCESS_REPORT_ATTEMPTS_COUNTER.labels(
                provider_type=provider_type).inc()

            report_dict["tracing_id"] = tracing_id
            report_dict["provider_type"] = provider_type

            _process_report_file(schema_name, provider_type, report_dict)

        except (ReportProcessorError,
                ReportProcessorDBError) as processing_error:
            worker_stats.PROCESS_REPORT_ERROR_COUNTER.labels(
                provider_type=provider_type).inc()
            LOG.error(log_json(tracing_id, str(processing_error), context))
            WorkerCache().remove_task_from_cache(cache_key)
            raise processing_error
        except NotImplementedError as err:
            LOG.info(log_json(tracing_id, str(err), context))
            WorkerCache().remove_task_from_cache(cache_key)

        WorkerCache().remove_task_from_cache(cache_key)

        return report_meta
    except ReportDownloaderWarning as err:
        LOG.warning(log_json(tracing_id, str(err), context))
        WorkerCache().remove_task_from_cache(cache_key)
    except Exception as err:
        worker_stats.PROCESS_REPORT_ERROR_COUNTER.labels(
            provider_type=provider_type).inc()
        LOG.error(log_json(tracing_id, str(err), context))
        WorkerCache().remove_task_from_cache(cache_key)
Пример #20
0
def get_report_files(
    self,
    customer_name,
    authentication,
    billing_source,
    provider_type,
    schema_name,
    provider_uuid,
    report_month,
    report_context,
):
    """
    Task to download a Report and process the report.

    FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests.
    Once we know a realistic processing time for the largest CUR file in production
    this value can be adjusted or made configurable.

    Args:
        customer_name     (String): Name of the customer owning the cost usage report.
        authentication    (String): Credential needed to access cost usage report
                                    in the backend provider.
        billing_source    (String): Location of the cost usage report in the backend provider.
        provider_type     (String): Koku defined provider type string.  Example: Amazon = 'AWS'
        schema_name       (String): Name of the DB schema

    Returns:
        None

    """
    worker_stats.GET_REPORT_ATTEMPTS_COUNTER.labels(
        provider_type=provider_type).inc()
    month = report_month
    if isinstance(report_month, str):
        month = parser.parse(report_month)

    cache_key = f"{provider_uuid}:{month.date()}"
    WorkerCache().add_task_to_cache(cache_key)

    report_dict = _get_report_files(
        self,
        customer_name,
        authentication,
        billing_source,
        provider_type,
        provider_uuid,
        month,
        cache_key,
        report_context,
    )

    stmt = (f"Reports to be processed:\n"
            f" schema_name: {customer_name}\n"
            f" provider: {provider_type}\n"
            f" provider_uuid: {provider_uuid}\n")
    if report_dict:
        stmt += f" file: {report_dict['file']}"
        LOG.info(stmt)
    else:
        return None

    try:
        stmt = (f"Processing starting:\n"
                f" schema_name: {customer_name}\n"
                f" provider: {provider_type}\n"
                f" provider_uuid: {provider_uuid}\n"
                f' file: {report_dict.get("file")}')
        LOG.info(stmt)
        worker_stats.PROCESS_REPORT_ATTEMPTS_COUNTER.labels(
            provider_type=provider_type).inc()

        _process_report_file(schema_name, provider_type, report_dict)

        report_meta = {
            "schema_name": schema_name,
            "provider_type": provider_type,
            "provider_uuid": provider_uuid,
            "manifest_id": report_dict.get("manifest_id"),
        }

    except (ReportProcessorError, ReportProcessorDBError) as processing_error:
        worker_stats.PROCESS_REPORT_ERROR_COUNTER.labels(
            provider_type=provider_type).inc()
        LOG.error(str(processing_error))
        WorkerCache().remove_task_from_cache(cache_key)
        raise processing_error

    WorkerCache().remove_task_from_cache(cache_key)
    start_date = report_dict.get("start_date")
    manifest_id = report_dict.get("manifest_id")
    if start_date:
        start_date_str = start_date.strftime("%Y-%m-%d")
        convert_to_parquet.delay(
            self.request.id,
            schema_name[4:],
            provider_uuid,
            provider_type,
            start_date_str,
            manifest_id,
            [report_context.get("local_file")],
        )
    return report_meta