def test_update(self): """Test updating an existing row.""" saver = ReportStatsDBAccessor("myreport", self.manifest_id) returned_obj = saver._get_db_obj_query() self.assertEqual(returned_obj.first().report_name, "myreport") saver.update( cursor_position=33, last_completed_datetime=parser.parse("2011-1-1 11:11:11"), last_started_datetime=parser.parse("2022-2-2 22:22:22"), etag="myetag", ) last_completed = saver.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) last_started = saver.get_last_started_datetime() self.assertEqual(last_started.year, 2022) self.assertEqual(last_started.month, 2) self.assertEqual(last_started.day, 2) self.assertEqual(last_started.hour, 22) self.assertEqual(last_started.minute, 22) self.assertEqual(last_started.second, 22) self.assertEqual(saver.get_etag(), "myetag") saver.delete() returned_obj = saver._get_db_obj_query() self.assertIsNone(returned_obj.first())
def test_initializer_preexisting_report(self): """Test getting a new accessor stats on a preexisting report.""" saver = ReportStatsDBAccessor('myreport') saver.update(cursor_position=33, last_completed_datetime='1/1/2011 11:11:11', last_started_datetime='2/2/22 22:22:22', etag='myetag') saver.commit() self.assertIsNotNone(saver._session) # Get another accessor for the same report and verify we get back the right information. saver2 = ReportStatsDBAccessor('myreport') last_completed = saver2.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) self.assertEqual(saver.get_etag(), 'myetag') saver.remove() saver.commit() saver.close_session() saver2.close_session()
def test_log_last_completed_datetime(self): """Test convience function for last completed processing time.""" saver = ReportStatsDBAccessor('myreport', self.manifest_id) saver.log_last_completed_datetime() self.assertIsNotNone(saver.get_last_completed_datetime()) saver.delete() self.assertEqual(CostUsageReportStatus.objects.count(), 0)
def remove_temp_cur_files(path): """Remove temporary cost usage report files.""" files = os.listdir(path) victim_list = [] current_assembly_id = None for file in files: file_path = '{}/{}'.format(path, file) if file.endswith('Manifest.json'): with open(file_path, 'r') as manifest_file_handle: manifest_json = json.load(manifest_file_handle) current_assembly_id = manifest_json.get('assemblyId') else: stats = ReportStatsDBAccessor(file) completed_date = stats.get_last_completed_datetime() if completed_date: assembly_id = utils.extract_uuids_from_string(file).pop() victim_list.append({'file': file_path, 'completed_date': completed_date, 'assemblyId': assembly_id}) removed_files = [] for victim in victim_list: if victim['assemblyId'] != current_assembly_id: LOG.info('Removing %s, completed processing on date %s', victim['file'], victim['completed_date']) os.remove(victim['file']) removed_files.append(victim['file']) return removed_files
def test_update(self): """Test updating an existing row.""" saver = ReportStatsDBAccessor('myreport', self.manifest_id) saver.commit() returned_obj = saver._get_db_obj_query() self.assertEqual(returned_obj.first().report_name, 'myreport') saver.update( cursor_position=33, last_completed_datetime='1/1/2011 11:11:11', last_started_datetime='2/2/22 22:22:22', etag='myetag', ) saver.commit() self.assertEqual(saver.get_cursor_position(), 33) last_completed = saver.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) last_started = saver.get_last_started_datetime() self.assertEqual(last_started.year, 2022) self.assertEqual(last_started.month, 2) self.assertEqual(last_started.day, 2) self.assertEqual(last_started.hour, 22) self.assertEqual(last_started.minute, 22) self.assertEqual(last_started.second, 22) saver.set_cursor_position(42) saver.commit() self.assertEqual(saver.get_cursor_position(), 42) self.assertEqual(saver.get_etag(), 'myetag') saver.update(cursor_position=100) saver.commit() self.assertEqual(saver.get_cursor_position(), 100) saver.delete() saver.commit() returned_obj = saver._get_db_obj_query() self.assertIsNone(returned_obj.first()) saver.close_session()
def test_initializer_preexisting_report(self): """Test getting a new accessor stats on a preexisting report.""" saver = ReportStatsDBAccessor("myreport", self.manifest_id) saver.update( cursor_position=33, last_completed_datetime="2011-1-1 11:11:11", last_started_datetime="2022-2-2 22:22:22", etag="myetag", ) self.assertIsNotNone(saver._obj) # Get another accessor for the same report and verify we get back the right information. saver2 = ReportStatsDBAccessor("myreport", self.manifest_id) last_completed = saver2.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) self.assertEqual(saver.get_etag(), "myetag")
def get_report_files(customer_name, authentication, billing_source, provider_type, schema_name, report_name=None): """ Task to download a Report. Note that report_name will be not optional once Koku can specify what report we should download. FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests. Once we know a realistic processing time for the largest CUR file in production this value can be adjusted or made configurable. Args: customer_name (String): Name of the customer owning the cost usage report. authentication (String): Credential needed to access cost usage report in the backend provider. billing_source (String): Location of the cost usage report in the backend provider. provider_type (String): Koku defined provider type string. Example: Amazon = 'AWS' schema_name (String): Name of the DB schema report_name (String): Name of the cost usage report to download. Returns: files (List) List of filenames with full local path. Example: ['/var/tmp/masu/my-report-name/aws/my-report-file.csv', '/var/tmp/masu/other-report-name/aws/other-report-file.csv'] """ reports = _get_report_files(customer_name, authentication, billing_source, provider_type, report_name) # initiate chained async task LOG.info('Reports to be processed: %s', str(reports)) for report_dict in reports: file_name = os.path.basename(report_dict.get('file')) stats = ReportStatsDBAccessor(file_name) started_date = stats.get_last_started_datetime() completed_date = stats.get_last_completed_datetime() stats.close_session() # Skip processing if already in progress. if started_date and not completed_date: expired_start_date = (started_date + datetime.timedelta(hours=2))\ .replace(tzinfo=pytz.UTC) if DateAccessor().today().replace(tzinfo=pytz.UTC) < expired_start_date: LOG.info('Skipping processing task for %s since it was started at: %s.', file_name, str(started_date)) continue # Skip processing if complete. if started_date and completed_date: LOG.info('Skipping processing task for %s. Started on: %s and completed on: %s.', file_name, str(started_date), str(completed_date)) continue request = {'schema_name': schema_name, 'report_path': report_dict.get('file'), 'compression': report_dict.get('compression')} result = process_report_file.delay(**request) LOG.info('Processing task queued - File: %s, Task ID: %s', report_dict.get('file'), str(result))