def remove_temp_cur_files(path): """Remove temporary cost usage report files.""" files = os.listdir(path) victim_list = [] current_assembly_id = None for file in files: file_path = '{}/{}'.format(path, file) if file.endswith('Manifest.json'): with open(file_path, 'r') as manifest_file_handle: manifest_json = json.load(manifest_file_handle) current_assembly_id = manifest_json.get('assemblyId') else: stats = ReportStatsDBAccessor(file) completed_date = stats.get_last_completed_datetime() if completed_date: assembly_id = utils.extract_uuids_from_string(file).pop() victim_list.append({'file': file_path, 'completed_date': completed_date, 'assemblyId': assembly_id}) removed_files = [] for victim in victim_list: if victim['assemblyId'] != current_assembly_id: LOG.info('Removing %s, completed processing on date %s', victim['file'], victim['completed_date']) os.remove(victim['file']) removed_files.append(victim['file']) return removed_files
def _process_report_file(schema_name, provider, provider_uuid, report_dict): """ Task to process a Report. Args: schema_name (String) db schema name provider (String) provider type provider_uuid (String) provider uuid report_dict (dict) The report data dict from previous task Returns: None """ start_date = report_dict.get('start_date') report_path = report_dict.get('file') compression = report_dict.get('compression') manifest_id = report_dict.get('manifest_id') provider_id = report_dict.get('provider_id') stmt = ('Processing Report:' ' schema_name: {},' ' report_path: {},' ' compression: {},' ' provider: {},' ' start_date: {}') log_statement = stmt.format(schema_name, report_path, compression, provider, start_date) LOG.info(log_statement) mem = psutil.virtual_memory() mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent) LOG.info(mem_msg) file_name = report_path.split('/')[-1] with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder: stats_recorder.log_last_started_datetime() processor = ReportProcessor(schema_name=schema_name, report_path=report_path, compression=compression, provider=provider, provider_id=provider_id, manifest_id=manifest_id) processor.process() with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder: stats_recorder.log_last_completed_datetime() with ReportManifestDBAccessor() as manifest_accesor: manifest = manifest_accesor.get_manifest_by_id(manifest_id) if manifest: manifest.num_processed_files += 1 manifest.save() manifest_accesor.mark_manifest_as_updated(manifest) else: LOG.error('Unable to find manifest for ID: %s, file %s', manifest_id, file_name) with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor: provider_accessor.setup_complete() files = processor.remove_processed_files(path.dirname(report_path)) LOG.info('Temporary files removed: %s', str(files))
def test_initializer_preexisting_report(self): """Test getting a new accessor stats on a preexisting report.""" saver = ReportStatsDBAccessor('myreport', self.manifest_id) saver.update( cursor_position=33, last_completed_datetime='2011-1-1 11:11:11', last_started_datetime='2022-2-2 22:22:22', etag='myetag', ) saver.commit() self.assertIsNotNone(saver._obj) # Get another accessor for the same report and verify we get back the right information. saver2 = ReportStatsDBAccessor('myreport', self.manifest_id) last_completed = saver2.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) self.assertEqual(saver.get_etag(), 'myetag')
def test_process_report_files_with_transaction_atomic_error( self, mock_files, mock_processor): """Test than an exception rolls back the atomic transaction.""" path = "{}/{}".format("test", "file1.csv") mock_files.return_value = [{"file": path, "compression": "GZIP"}] schema_name = self.schema provider = Provider.PROVIDER_AWS provider_uuid = self.aws_provider_uuid report_month = DateHelper().today manifest_dict = { "assembly_id": "12345", "billing_period_start_datetime": report_month, "num_total_files": 1, "provider_uuid": self.aws_provider_uuid, "task": "170653c0-3e66-4b7e-a764-336496d7ca5a", } with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.add(**manifest_dict) manifest.save() manifest_id = manifest.id initial_update_time = manifest.manifest_updated_datetime with ReportStatsDBAccessor("file1.csv", manifest_id) as stats_accessor: stats_accessor.get_last_completed_datetime with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor: report_file_accessor.get_last_started_datetime() mock_processor.side_effect = Exception with self.assertRaises(Exception): customer_name = "Fake Customer" authentication = "auth" billing_source = "bill" provider_type = provider get_report_files( customer_name=customer_name, authentication=authentication, billing_source=billing_source, provider_type=provider_type, schema_name=schema_name, provider_uuid=provider_uuid, report_month=report_month, ) with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor: self.assertIsNone( report_file_accessor.get_last_completed_datetime()) with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(manifest_id) self.assertEqual(manifest.num_processed_files, 0) self.assertEqual(manifest.manifest_updated_datetime, initial_update_time) with ProviderDBAccessor( provider_uuid=provider_uuid) as provider_accessor: self.assertFalse(provider_accessor.get_setup_complete())
def download_report(self, date_time): """ Download CUR for a given date. Args: date_time (DateTime): The starting datetime object Returns: ([{}]) List of dictionaries containing file path and compression. """ LOG.info('Current date is %s. Attempting to get manifest...', str(date_time)) manifest = self._get_manifest(date_time) reports = manifest.get('reportKeys') cur_reports = [] for report in reports: report_dictionary = {} local_s3_filename = utils.get_local_file_name(report) stats_recorder = ReportStatsDBAccessor(local_s3_filename) stored_etag = stats_recorder.get_etag() file_name, etag = self.download_file(report, stored_etag) stats_recorder.update(etag=etag) stats_recorder.commit() stats_recorder.close_session() report_dictionary['file'] = file_name report_dictionary['compression'] = self.report.get('Compression') cur_reports.append(report_dictionary) return cur_reports
def test_process_report_files_with_transaction_atomic_error( self, mock_processor, mock_setup_complete): """Test than an exception rolls back the atomic transaction.""" path = '{}/{}'.format('test', 'file1.csv') schema_name = self.schema provider = Provider.PROVIDER_AWS provider_uuid = self.aws_provider_uuid manifest_dict = { 'assembly_id': '12345', 'billing_period_start_datetime': DateAccessor().today_with_timezone('UTC'), 'num_total_files': 2, 'provider_uuid': self.aws_provider_uuid, 'task': '170653c0-3e66-4b7e-a764-336496d7ca5a', } with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.add(**manifest_dict) manifest.save() manifest_id = manifest.id initial_update_time = manifest.manifest_updated_datetime with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor: report_file_accessor.get_last_started_datetime() report_dict = { 'file': path, 'compression': 'gzip', 'start_date': str(DateAccessor().today()), 'manifest_id': manifest_id, } mock_setup_complete.side_effect = Exception with self.assertRaises(Exception): _process_report_file(schema_name, provider, provider_uuid, report_dict) with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor: self.assertIsNone( report_file_accessor.get_last_completed_datetime()) with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(manifest_id) self.assertEqual(manifest.num_processed_files, 0) self.assertEqual(manifest.manifest_updated_datetime, initial_update_time) with ProviderDBAccessor( provider_uuid=provider_uuid) as provider_accessor: self.assertFalse(provider_accessor.get_setup_complete())
def test_get_last_report_completed_datetime(self): """Test that the last completed report datetime is returned.""" manifest = self.manifest_accessor.add(**self.manifest_dict) earlier_time = DateAccessor().today_with_timezone('UTC') later_time = earlier_time + datetime.timedelta(hours=1) ReportStatsDBAccessor('earlier_report', manifest.id).update(last_completed_datetime=earlier_time) ReportStatsDBAccessor('later_report', manifest.id).update(last_completed_datetime=later_time) result = self.manifest_accessor.get_last_report_completed_datetime(manifest.id) self.assertEqual(result, later_time)
def test_remove_temp_cur_files(self): """Test to remove temporary cost usage files.""" cur_dir = tempfile.mkdtemp() manifest_data = {"assemblyId": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5"} manifest = '{}/{}'.format(cur_dir, 'koku-Manifest.json') with open(manifest, 'w') as outfile: json.dump(manifest_data, outfile) file_list = [{'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-koku-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3)}, {'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-koku-2.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3)}, {'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-koku-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=2)}, {'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-koku-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=1)}, {'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-koku-1.csv.gz', 'processed_date': None}] expected_delete_list = [] for item in file_list: path = '{}/{}'.format(cur_dir, item['file']) f = open(path, 'w') obj = self.manifest_accessor.get_manifest(self.assembly_id, self.provider_id) stats = ReportStatsDBAccessor(item['file'], obj.id) stats.update(last_completed_datetime=item['processed_date']) stats.commit() stats.close_session() f.close() if not item['file'].startswith(manifest_data.get('assemblyId')) and item['processed_date']: expected_delete_list.append(path) removed_files = self.processor.remove_temp_cur_files(cur_dir) self.assertEqual(sorted(removed_files), sorted(expected_delete_list)) shutil.rmtree(cur_dir)
def test_clear_last_started_date(self): """Test convience function for clear last started date.""" saver = ReportStatsDBAccessor("myreport", self.manifest_id) saver.log_last_started_datetime() self.assertIsNotNone(saver.get_last_started_datetime()) saver.clear_last_started_datetime() self.assertIsNone(saver.get_last_started_datetime())
def test_initializer(self): """Test Initializer""" saver = ReportStatsDBAccessor('myreport') self.assertIsNotNone(saver._session) saver.remove() saver.commit() saver.close_session()
def record_report_status(manifest_id, file_name): """ Creates initial report status database entry for new report files. If a report has already been downloaded from the ingress service there is a chance that processing has already been complete. The function returns the last completed date time to determine if the report processing should continue in extract_payload. Args: manifest_id (Integer): Manifest Identifier. file_name (String): Report file name Returns: DateTime - Last completed date time for a given report file. """ already_processed = False with ReportStatsDBAccessor(file_name, manifest_id) as db_accessor: already_processed = db_accessor.get_last_completed_datetime() if already_processed: LOG.info(f"Report {file_name} has already been processed.") else: LOG.info(f"Recording stats entry for {file_name}") return already_processed
def record_report_status(manifest_id, file_name, request_id, context={}): """ Creates initial report status database entry for new report files. If a report has already been downloaded from the ingress service there is a chance that processing has already been complete. The function returns the last completed date time to determine if the report processing should continue in extract_payload. Args: manifest_id (Integer): Manifest Identifier. file_name (String): Report file name request_id (String): Identifier associated with the payload context (Dict): Context for logging (account, etc) Returns: DateTime - Last completed date time for a given report file. """ already_processed = False with ReportStatsDBAccessor(file_name, manifest_id) as db_accessor: already_processed = db_accessor.get_last_completed_datetime() if already_processed: msg = f"Report {file_name} has already been processed." else: msg = f"Recording stats entry for {file_name}" LOG.info(log_json(request_id, msg, context)) return already_processed
def download_report(self, date_time): """ Download CUR for a given date. Args: date_time (DateTime): The starting datetime object Returns: ([{}]) List of dictionaries containing file path and compression. """ LOG.info('Attempting to get %s manifest for %s...', self.provider_type, str(date_time)) report_context = self._downloader.get_report_context_for_date(date_time) manifest_id = report_context.get('manifest_id') reports = report_context.get('files', []) cur_reports = [] for report in reports: report_dictionary = {} local_file_name = self._downloader.get_local_file_for_report(report) with ReportStatsDBAccessor(local_file_name, manifest_id) as stats_recorder: stored_etag = stats_recorder.get_etag() file_name, etag = self._downloader.download_file(report, stored_etag) stats_recorder.update(etag=etag) report_dictionary['file'] = file_name report_dictionary['compression'] = report_context.get('compression') report_dictionary['start_date'] = date_time report_dictionary['assembly_id'] = report_context.get('assembly_id') report_dictionary['manifest_id'] = manifest_id report_dictionary['provider_uuid'] = self.provider_uuid cur_reports.append(report_dictionary) return cur_reports
def test_check_if_manifest_should_be_downloaded_error_processing_manifest( self): """Test that a manifest that did not succeessfully process should be reprocessed.""" reports = CostUsageReportStatus.objects.filter( manifest_id=self.manifest_id) with ReportStatsDBAccessor(reports[0].report_name, reports[0].manifest_id) as file_accessor: file_accessor.log_last_started_datetime() file_accessor.log_last_completed_datetime() with ReportStatsDBAccessor(reports[1].report_name, reports[1].manifest_id) as file_accessor: file_accessor.log_last_started_datetime() file_accessor.update(last_completed_datetime=None) result = self.downloader.check_if_manifest_should_be_downloaded( self.assembly_id) self.assertTrue(result)
def download_report(self, date_time): """ Download CUR for a given date. Args: date_time (DateTime): The starting datetime object Returns: ([{}]) List of dictionaries containing file path and compression. """ manifest = self._get_manifest(date_time) reports = manifest.get('reportKeys') cur_reports = [] for report in reports: report_dictionary = {} local_s3_filename = utils.get_local_file_name(report) stats_recorder = ReportStatsDBAccessor(local_s3_filename) stored_etag = stats_recorder.get_etag() report_path = self.bucket_path + '/' + report LOG.info('Downloading %s with credential %s', report_path, self.credential) file_name, etag = self.download_file(report_path, stored_etag) stats_recorder.update(etag=etag) stats_recorder.commit() report_dictionary['file'] = file_name report_dictionary['compression'] = 'GZIP' cur_reports.append(report_dictionary) return cur_reports
def test_log_last_completed_datetime(self): """Test convience function for last completed processing time.""" saver = ReportStatsDBAccessor('myreport', self.manifest_id) saver.log_last_completed_datetime() self.assertIsNotNone(saver.get_last_completed_datetime()) saver.delete() self.assertEqual(CostUsageReportStatus.objects.count(), 0)
def generate_test_report_files(self): for file_cnt in range(self._num_total_files): file_name = f"file_{file_cnt}" with ReportStatsDBAccessor(file_name, self._manifest_id): print( f"Generating file entry ({file_name}) for manifest {self._manifest_id}" ) self._report_files.append(file_name) return file_name
def test_process_report_files_with_transaction_atomic_error(self, mock_processor, mock_setup_complete): """Test than an exception rolls back the atomic transaction.""" path = "{}/{}".format("test", "file1.csv") schema_name = self.schema provider = Provider.PROVIDER_AWS provider_uuid = self.aws_provider_uuid manifest_dict = { "assembly_id": "12345", "billing_period_start_datetime": DateAccessor().today_with_timezone("UTC"), "num_total_files": 2, "provider_uuid": self.aws_provider_uuid, "task": "170653c0-3e66-4b7e-a764-336496d7ca5a", } with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.add(**manifest_dict) manifest.save() manifest_id = manifest.id initial_update_time = manifest.manifest_updated_datetime with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor: report_file_accessor.get_last_started_datetime() report_dict = { "file": path, "compression": "gzip", "start_date": str(DateAccessor().today()), "manifest_id": manifest_id, } mock_setup_complete.side_effect = Exception with self.assertRaises(Exception): _process_report_file(schema_name, provider, provider_uuid, report_dict) with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor: self.assertIsNone(report_file_accessor.get_last_completed_datetime()) with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(manifest_id) self.assertEqual(manifest.num_processed_files, 0) self.assertEqual(manifest.manifest_updated_datetime, initial_update_time) with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor: self.assertFalse(provider_accessor.get_setup_complete())
def test_record_report_status(self): """Test recording initial report stats.""" test_manifest_id = 1 test_file_name = "testreportfile.csv" msg_handler.record_report_status(test_manifest_id, test_file_name) with ReportStatsDBAccessor(test_file_name, test_manifest_id) as accessor: self.assertEqual(accessor._manifest_id, test_manifest_id) self.assertEqual(accessor._report_name, test_file_name)
def generate_one_test_file(self): file_cnt = len(self._report_files) file_name = f"file_{file_cnt}" with ReportStatsDBAccessor(file_name, self._manifest_id): print( f"Generating file entry ({file_name}) for manifest {self._manifest_id}" ) self._report_files.append(file_name) return file_name return None
def download_report(self, report_context): """ Download CUR for a given date. Args: date_time (DateTime): The starting datetime object Returns: ([{}]) List of dictionaries containing file path and compression. """ date_time = report_context.get("date") msg = f"Attempting to get {self.provider_type} manifest for {str(date_time)}." LOG.info(log_json(self.tracing_id, msg, self.context)) manifest_id = report_context.get("manifest_id") report = report_context.get("current_file") local_file_name = self._downloader.get_local_file_for_report(report) if self.is_report_processed(local_file_name, manifest_id): LOG.info( f"File has already been processed: {local_file_name}. Skipping..." ) return {} with ReportStatsDBAccessor(local_file_name, manifest_id) as stats_recorder: stored_etag = stats_recorder.get_etag() try: file_name, etag, _, split_files = self._downloader.download_file( report, stored_etag, manifest_id=manifest_id, start_date=date_time) stats_recorder.update(etag=etag) except (AWSReportDownloaderNoFileError, AzureReportDownloaderError) as error: LOG.warning( f"Unable to download report file: {report}. Reason: {str(error)}" ) return {} # The create_table flag is used by the ParquetReportProcessor # to create a Hive/Trino table. return { "file": file_name, "split_files": split_files, "compression": report_context.get("compression"), "start_date": date_time, "assembly_id": report_context.get("assembly_id"), "manifest_id": manifest_id, "provider_uuid": self.provider_uuid, "create_table": report_context.get("create_table", False), }
def tearDown(self): """Tear down each test case.""" super().tearDown() with ReportStatsDBAccessor(self.report_name, self.manifest_id) as file_accessor: files = file_accessor._get_db_obj_query().all() for file in files: file_accessor.delete(file) with ReportManifestDBAccessor() as manifest_accessor: manifests = manifest_accessor._get_db_obj_query().all() for manifest in manifests: manifest_accessor.delete(manifest)
def test_check_if_manifest_should_be_downloaded_error_no_complete_date(self, _): """Test that a manifest that did not succeessfully process should be reprocessed.""" with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(self.manifest_id) manifest.num_processed_files = 1 manifest.num_total_files = 2 manifest.save() with ReportStatsDBAccessor(self.report_name, self.manifest_id) as file_accessor: file_accessor.log_last_started_datetime() result = self.downloader.check_if_manifest_should_be_downloaded(self.assembly_id) self.assertTrue(result)
def test_add_remove(self): """Test basic add/remove logic.""" saver = ReportStatsDBAccessor("myreport", self.manifest_id) self.assertTrue(saver.does_db_entry_exist()) returned_obj = saver._get_db_obj_query() self.assertEqual(returned_obj.first().report_name, "myreport") saver.delete() returned_obj = saver._get_db_obj_query() self.assertIsNone(returned_obj.first())
def record_all_manifest_files(manifest_id, report_files): """Store all report file names for manifest ID.""" for report in report_files: try: with ReportStatsDBAccessor(report, manifest_id): LOG.debug(f"Logging {report} for manifest ID: {manifest_id}") except IntegrityError: # OCP records the entire file list for a new manifest when the listener # recieves a payload. With multiple listeners it is possilbe for # two listeners to recieve a report file for the same manifest at # roughly the same time. In that case the report file may already # exist and an IntegrityError would be thrown. LOG.debug(f"Report {report} has already been recorded.")
def test_remove_temp_cur_files(self): """Test to remove temporary usage report files.""" insights_local_dir = tempfile.mkdtemp() manifest_data = {"uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5"} manifest = '{}/{}'.format(insights_local_dir, 'manifest.json') with open(manifest, 'w') as outfile: json.dump(manifest_data, outfile) file_list = [ { 'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3), }, { 'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-2.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3), }, { 'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=2), }, { 'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=1), }, { 'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-ocp-1.csv.gz', 'processed_date': None, }, ] expected_delete_list = [] for item in file_list: path = '{}/{}'.format(insights_local_dir, item['file']) f = open(path, 'w') stats = ReportStatsDBAccessor(item['file'], None) stats.update(last_completed_datetime=item['processed_date']) stats.commit() stats.close_session() f.close() if (not item['file'].startswith(manifest_data.get('uuid')) and item['processed_date']): expected_delete_list.append(path) removed_files = self.ocp_processor.remove_temp_cur_files( insights_local_dir, manifest_id=None) self.assertEqual(sorted(removed_files), sorted(expected_delete_list)) shutil.rmtree(insights_local_dir)
def test_check_if_manifest_should_be_downloaded_currently_processing_manifest(self, _): """Test that a manifest being processed should not be reprocessed.""" with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(self.manifest_id) manifest.num_processed_files = 1 manifest.num_total_files = 2 manifest.save() with ReportStatsDBAccessor(self.report_name, self.manifest_id) as file_accessor: file_accessor.log_last_started_datetime() file_accessor.log_last_completed_datetime() result = self.downloader.check_if_manifest_should_be_downloaded(self.assembly_id) self.assertFalse(result)
def test_check_if_manifest_should_be_downloaded_done_processing_manifest( self): """Test that a manifest that has finished processing is not reprocessed.""" reports = CostUsageReportStatus.objects.filter( manifest_id=self.manifest_id) for report in reports: with ReportStatsDBAccessor(report.report_name, report.manifest_id) as file_accessor: file_accessor.log_last_started_datetime() file_accessor.log_last_completed_datetime() result = self.downloader.check_if_manifest_should_be_downloaded( self.assembly_id) self.assertFalse(result)
def download_report(self, date_time): """ Download CUR for a given date. Args: date_time (DateTime): The starting datetime object Returns: ([{}]) List of dictionaries containing file path and compression. """ msg = f"Attempting to get {self.provider_type,} manifest for {str(date_time)}..." LOG.info(log_json(self.request_id, msg, self.context)) report_context = self._downloader.get_report_context_for_date( date_time) manifest_id = report_context.get("manifest_id") reports = report_context.get("files", []) cur_reports = [] for report in reports: report_dictionary = {} local_file_name = self._downloader.get_local_file_for_report( report) if self.is_report_processed(local_file_name, manifest_id): msg = f"File has already been processed: {local_file_name}. Skipping..." LOG.info(log_json(self.request_id, msg, self.context)) continue with ReportStatsDBAccessor(local_file_name, manifest_id) as stats_recorder: stored_etag = stats_recorder.get_etag() file_name, etag = self._downloader.download_file( report, stored_etag, manifest_id=manifest_id, start_date=date_time) stats_recorder.update(etag=etag) report_dictionary["file"] = file_name report_dictionary["compression"] = report_context.get( "compression") report_dictionary["start_date"] = date_time report_dictionary["assembly_id"] = report_context.get( "assembly_id") report_dictionary["manifest_id"] = manifest_id report_dictionary["provider_uuid"] = self.provider_uuid cur_reports.append(report_dictionary) return cur_reports
def test_azure_remove_temp_cur_files(self): """Test to remove temporary cost usage files.""" cur_dir = tempfile.mkdtemp() manifest_data = {"assemblyId": "31727a10-f4b4-43a2-80e5-bef1aaeabfc1"} manifest = '{}/{}'.format(cur_dir, 'Manifest.json') with open(manifest, 'w') as outfile: json.dump(manifest_data, outfile) file_list = [ { 'file': 'costreport_31727a10-f4b4-43a2-80e5-bef1aaeabfc1.csv', 'processed_date': datetime.datetime(year=2018, month=5, day=3), }, { 'file': 'costreport_31727a10-f4b4-43a2-80e5-bef1aaeabfc1.csv', 'processed_date': datetime.datetime(year=2018, month=5, day=3), }, { 'file': 'costreport_2aeb9169-2526-441c-9eca-d7ed015d52bd.csv', 'processed_date': datetime.datetime(year=2018, month=5, day=2), }, { 'file': 'costreport_6c8487e8-c590-4e6a-b2c2-91a2375c0bad.csv', 'processed_date': datetime.datetime(year=2018, month=5, day=1), }, { 'file': 'costreport_6c8487e8-c590-4e6a-b2c2-91a2375d0bed.csv', 'processed_date': None, }, ] expected_delete_list = [] for item in file_list: path = '{}/{}'.format(cur_dir, item['file']) f = open(path, 'w') obj = self.manifest_accessor.get_manifest(self.assembly_id, self.azure_provider.id) with ReportStatsDBAccessor(item['file'], obj.id) as stats: stats.update(last_completed_datetime=item['processed_date']) f.close() if (not manifest_data.get('assemblyId') in item['file'] and item['processed_date']): expected_delete_list.append(path) removed_files = self.processor.remove_temp_cur_files(cur_dir) self.assertEqual(sorted(removed_files), sorted(expected_delete_list)) shutil.rmtree(cur_dir)