def test_clear_last_started_date(self): """Test convience function for clear last started date.""" saver = ReportStatsDBAccessor("myreport", self.manifest_id) saver.log_last_started_datetime() self.assertIsNotNone(saver.get_last_started_datetime()) saver.clear_last_started_datetime() self.assertIsNone(saver.get_last_started_datetime())
def test_update(self): """Test updating an existing row.""" saver = ReportStatsDBAccessor("myreport", self.manifest_id) returned_obj = saver._get_db_obj_query() self.assertEqual(returned_obj.first().report_name, "myreport") saver.update( cursor_position=33, last_completed_datetime=parser.parse("2011-1-1 11:11:11"), last_started_datetime=parser.parse("2022-2-2 22:22:22"), etag="myetag", ) last_completed = saver.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) last_started = saver.get_last_started_datetime() self.assertEqual(last_started.year, 2022) self.assertEqual(last_started.month, 2) self.assertEqual(last_started.day, 2) self.assertEqual(last_started.hour, 22) self.assertEqual(last_started.minute, 22) self.assertEqual(last_started.second, 22) self.assertEqual(saver.get_etag(), "myetag") saver.delete() returned_obj = saver._get_db_obj_query() self.assertIsNone(returned_obj.first())
def test_log_last_started_datetime(self): """Test convience function for last started processing time.""" saver = ReportStatsDBAccessor('myreport', self.manifest_id) saver.log_last_started_datetime() self.assertIsNotNone(saver.get_last_started_datetime()) saver.delete() self.assertEqual(CostUsageReportStatus.objects.count(), 0)
def test_update(self): """Test updating an existing row.""" saver = ReportStatsDBAccessor('myreport', self.manifest_id) saver.commit() returned_obj = saver._get_db_obj_query() self.assertEqual(returned_obj.first().report_name, 'myreport') saver.update( cursor_position=33, last_completed_datetime='1/1/2011 11:11:11', last_started_datetime='2/2/22 22:22:22', etag='myetag', ) saver.commit() self.assertEqual(saver.get_cursor_position(), 33) last_completed = saver.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) last_started = saver.get_last_started_datetime() self.assertEqual(last_started.year, 2022) self.assertEqual(last_started.month, 2) self.assertEqual(last_started.day, 2) self.assertEqual(last_started.hour, 22) self.assertEqual(last_started.minute, 22) self.assertEqual(last_started.second, 22) saver.set_cursor_position(42) saver.commit() self.assertEqual(saver.get_cursor_position(), 42) self.assertEqual(saver.get_etag(), 'myetag') saver.update(cursor_position=100) saver.commit() self.assertEqual(saver.get_cursor_position(), 100) saver.delete() saver.commit() returned_obj = saver._get_db_obj_query() self.assertIsNone(returned_obj.first()) saver.close_session()
def get_report_files(customer_name, authentication, billing_source, provider_type, schema_name, report_name=None): """ Task to download a Report. Note that report_name will be not optional once Koku can specify what report we should download. FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests. Once we know a realistic processing time for the largest CUR file in production this value can be adjusted or made configurable. Args: customer_name (String): Name of the customer owning the cost usage report. authentication (String): Credential needed to access cost usage report in the backend provider. billing_source (String): Location of the cost usage report in the backend provider. provider_type (String): Koku defined provider type string. Example: Amazon = 'AWS' schema_name (String): Name of the DB schema report_name (String): Name of the cost usage report to download. Returns: files (List) List of filenames with full local path. Example: ['/var/tmp/masu/my-report-name/aws/my-report-file.csv', '/var/tmp/masu/other-report-name/aws/other-report-file.csv'] """ reports = _get_report_files(customer_name, authentication, billing_source, provider_type, report_name) # initiate chained async task LOG.info('Reports to be processed: %s', str(reports)) for report_dict in reports: file_name = os.path.basename(report_dict.get('file')) stats = ReportStatsDBAccessor(file_name) started_date = stats.get_last_started_datetime() completed_date = stats.get_last_completed_datetime() stats.close_session() # Skip processing if already in progress. if started_date and not completed_date: expired_start_date = (started_date + datetime.timedelta(hours=2))\ .replace(tzinfo=pytz.UTC) if DateAccessor().today().replace(tzinfo=pytz.UTC) < expired_start_date: LOG.info('Skipping processing task for %s since it was started at: %s.', file_name, str(started_date)) continue # Skip processing if complete. if started_date and completed_date: LOG.info('Skipping processing task for %s. Started on: %s and completed on: %s.', file_name, str(started_date), str(completed_date)) continue request = {'schema_name': schema_name, 'report_path': report_dict.get('file'), 'compression': report_dict.get('compression')} result = process_report_file.delay(**request) LOG.info('Processing task queued - File: %s, Task ID: %s', report_dict.get('file'), str(result))