Пример #1
0
    def test_initializer_preexisting_report(self):
        """Test getting a new accessor stats on a preexisting report."""
        saver = ReportStatsDBAccessor('myreport')
        saver.update(cursor_position=33,
                     last_completed_datetime='1/1/2011 11:11:11',
                     last_started_datetime='2/2/22 22:22:22',
                     etag='myetag')
        saver.commit()

        self.assertIsNotNone(saver._session)

        # Get another accessor for the same report and verify we get back the right information.
        saver2 = ReportStatsDBAccessor('myreport')
        last_completed = saver2.get_last_completed_datetime()

        self.assertEqual(last_completed.year, 2011)
        self.assertEqual(last_completed.month, 1)
        self.assertEqual(last_completed.day, 1)
        self.assertEqual(last_completed.hour, 11)
        self.assertEqual(last_completed.minute, 11)
        self.assertEqual(last_completed.second, 11)

        self.assertEqual(saver.get_etag(), 'myetag')

        saver.remove()
        saver.commit()
        saver.close_session()
        saver2.close_session()
    def test_remove_temp_cur_files(self):
        """Test to remove temporary cost usage files."""
        cur_dir = tempfile.mkdtemp()

        manifest_data = {"assemblyId": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5"}
        manifest = '{}/{}'.format(cur_dir, 'koku-Manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        file_list = [{'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-koku-1.csv.gz',
                      'processed_date': datetime.datetime(year=2018, month=5, day=3)},
                     {'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-koku-2.csv.gz',
                      'processed_date': datetime.datetime(year=2018, month=5, day=3)},
                     {'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-koku-1.csv.gz',
                      'processed_date': datetime.datetime(year=2018, month=5, day=2)},
                     {'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-koku-1.csv.gz',
                      'processed_date': datetime.datetime(year=2018, month=5, day=1)},
                     {'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-koku-1.csv.gz',
                      'processed_date': None}]
        expected_delete_list = []
        for item in file_list:
            path = '{}/{}'.format(cur_dir, item['file'])
            f = open(path, 'w')
            obj = self.manifest_accessor.get_manifest(self.assembly_id, self.provider_id)
            stats = ReportStatsDBAccessor(item['file'], obj.id)
            stats.update(last_completed_datetime=item['processed_date'])
            stats.commit()
            stats.close_session()
            f.close()
            if not item['file'].startswith(manifest_data.get('assemblyId')) and item['processed_date']:
                expected_delete_list.append(path)

        removed_files = self.processor.remove_temp_cur_files(cur_dir)
        self.assertEqual(sorted(removed_files), sorted(expected_delete_list))
        shutil.rmtree(cur_dir)
Пример #3
0
    def download_report(self, date_time):
        """
        Download CUR for a given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            ([{}]) List of dictionaries containing file path and compression.

        """
        LOG.info('Current date is %s.  Attempting to get manifest...',
                 str(date_time))
        manifest = self._get_manifest(date_time)
        reports = manifest.get('reportKeys')

        cur_reports = []
        for report in reports:
            report_dictionary = {}
            local_s3_filename = utils.get_local_file_name(report)
            stats_recorder = ReportStatsDBAccessor(local_s3_filename)
            stored_etag = stats_recorder.get_etag()
            file_name, etag = self.download_file(report, stored_etag)
            stats_recorder.update(etag=etag)
            stats_recorder.commit()
            stats_recorder.close_session()

            report_dictionary['file'] = file_name
            report_dictionary['compression'] = self.report.get('Compression')

            cur_reports.append(report_dictionary)
        return cur_reports
Пример #4
0
    def test_initializer(self):
        """Test Initializer"""
        saver = ReportStatsDBAccessor('myreport')
        self.assertIsNotNone(saver._session)

        saver.remove()
        saver.commit()
        saver.close_session()
Пример #5
0
    def test_log_last_completed_datetime(self):
        """Test convience function for last completed processing time."""
        saver = ReportStatsDBAccessor('myreport')
        saver.commit()
        saver.log_last_completed_datetime()
        saver.commit()

        saver.remove()
        saver.commit()
        saver.close_session()
Пример #6
0
    def test_log_last_completed_datetime(self):
        """Test convience function for last completed processing time."""
        saver = ReportStatsDBAccessor('myreport', self.manifest_id)
        saver.log_last_completed_datetime()
        saver.commit()

        # FIXME: missing asserts
        saver.delete()
        saver.commit()
        saver.close_session()
Пример #7
0
    def test_add_remove(self):
        """Test basic add/remove logic."""
        saver = ReportStatsDBAccessor('myreport')
        saver.commit()

        self.assertTrue(saver.does_db_entry_exist())
        returned_obj = saver._get_db_obj_query()
        self.assertEqual(returned_obj.first().report_name, 'myreport')

        saver.remove()
        saver.commit()
        returned_obj = saver._get_db_obj_query()
        self.assertIsNone(returned_obj.first())
        saver.close_session()
    def test_remove_temp_cur_files(self):
        """Test to remove temporary usage report files."""
        insights_local_dir = tempfile.mkdtemp()

        manifest_data = {"uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5"}
        manifest = '{}/{}'.format(insights_local_dir, 'manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        file_list = [
            {
                'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=3),
            },
            {
                'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-2.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=3),
            },
            {
                'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=2),
            },
            {
                'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=1),
            },
            {
                'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-ocp-1.csv.gz',
                'processed_date': None,
            },
        ]
        expected_delete_list = []
        for item in file_list:
            path = '{}/{}'.format(insights_local_dir, item['file'])
            f = open(path, 'w')
            stats = ReportStatsDBAccessor(item['file'], None)
            stats.update(last_completed_datetime=item['processed_date'])
            stats.commit()
            stats.close_session()
            f.close()
            if (not item['file'].startswith(manifest_data.get('uuid'))
                    and item['processed_date']):
                expected_delete_list.append(path)

        removed_files = self.ocp_processor.remove_temp_cur_files(
            insights_local_dir, manifest_id=None)
        self.assertEqual(sorted(removed_files), sorted(expected_delete_list))
        shutil.rmtree(insights_local_dir)
    def test_update(self):
        """Test updating an existing row."""
        saver = ReportStatsDBAccessor('myreport', self.manifest_id)
        saver.commit()

        returned_obj = saver._get_db_obj_query()
        self.assertEqual(returned_obj.first().report_name, 'myreport')

        saver.update(
            cursor_position=33,
            last_completed_datetime='1/1/2011 11:11:11',
            last_started_datetime='2/2/22 22:22:22',
            etag='myetag',
        )
        saver.commit()

        self.assertEqual(saver.get_cursor_position(), 33)
        last_completed = saver.get_last_completed_datetime()
        self.assertEqual(last_completed.year, 2011)
        self.assertEqual(last_completed.month, 1)
        self.assertEqual(last_completed.day, 1)
        self.assertEqual(last_completed.hour, 11)
        self.assertEqual(last_completed.minute, 11)
        self.assertEqual(last_completed.second, 11)

        last_started = saver.get_last_started_datetime()
        self.assertEqual(last_started.year, 2022)
        self.assertEqual(last_started.month, 2)
        self.assertEqual(last_started.day, 2)
        self.assertEqual(last_started.hour, 22)
        self.assertEqual(last_started.minute, 22)
        self.assertEqual(last_started.second, 22)

        saver.set_cursor_position(42)
        saver.commit()

        self.assertEqual(saver.get_cursor_position(), 42)
        self.assertEqual(saver.get_etag(), 'myetag')

        saver.update(cursor_position=100)
        saver.commit()
        self.assertEqual(saver.get_cursor_position(), 100)

        saver.delete()
        saver.commit()
        returned_obj = saver._get_db_obj_query()
        self.assertIsNone(returned_obj.first())
        saver.close_session()
Пример #10
0
def _process_report_file(schema_name, report_path, compression):
    """
    Task to process a Report.

    Args:
        schema_name (String) db schema name
        report_path (String) path to downloaded reports
        compression (String) 'PLAIN' or 'GZIP'

    Returns:
        None

    """
    stmt = ('Processing Report:'
            ' schema_name: {},'
            ' report_path: {},'
            ' compression: {}')
    log_statement = stmt.format(schema_name, report_path, compression)
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent)
    LOG.info(mem_msg)

    file_name = report_path.split('/')[-1]

    stats_recorder = ReportStatsDBAccessor(file_name)
    stats_recorder.log_last_started_datetime()
    stats_recorder.commit()

    processor = ReportProcessor(schema_name=schema_name,
                                report_path=report_path,
                                compression=compression)

    processor.process()
    stats_recorder.log_last_completed_datetime()
    stats_recorder.commit()
    stats_recorder.close_session()

    files = remove_files.remove_temp_cur_files(path.dirname(report_path))
    LOG.info('Temporary files removed: %s', str(files))
Пример #11
0
    def test_remove_temp_cur_files(self):
        """Test to remove temporary usage report files."""
        insights_local_dir = tempfile.mkdtemp()
        cluster_id = 'my-ocp-cluster'
        manifest_date = "2018-05-01"
        manifest_data = {
            "uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5",
            "cluster_id": cluster_id,
            "date": manifest_date
        }
        manifest = '{}/{}'.format(insights_local_dir, 'manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        file_list = [{
            'file':
            '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-1.csv.gz',
            'processed_date':
            datetime.datetime(year=2018, month=5, day=3)
        }, {
            'file':
            '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-2.csv.gz',
            'processed_date':
            datetime.datetime(year=2018, month=5, day=3)
        }, {
            'file':
            '2aeb9169-2526-441c-9eca-d7ed015d52bd-ocp-1.csv.gz',
            'processed_date':
            datetime.datetime(year=2018, month=5, day=2)
        }, {
            'file':
            '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-ocp-1.csv.gz',
            'processed_date':
            datetime.datetime(year=2018, month=5, day=1)
        }, {
            'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-ocp-1.csv.gz',
            'processed_date': None
        }]
        expected_delete_list = []
        for item in file_list:
            path = '{}/{}'.format(insights_local_dir, item['file'])
            f = open(path, 'w')
            obj = self.manifest_accessor.get_manifest(self.assembly_id,
                                                      self.provider_id)
            stats = ReportStatsDBAccessor(item['file'], obj.id)
            stats.update(last_completed_datetime=item['processed_date'])
            stats.commit()
            stats.close_session()
            f.close()
            if not item['file'].startswith(
                    manifest_data.get('uuid')) and item['processed_date']:
                expected_delete_list.append(path)
        fake_dir = tempfile.mkdtemp()
        with patch.object(Config, 'INSIGHTS_LOCAL_REPORT_DIR', fake_dir):
            destination_dir = '{}/{}/{}'.format(
                fake_dir, cluster_id,
                month_date_range(parser.parse(manifest_date)))
            os.makedirs(destination_dir, exist_ok=True)
            removed_files = self.ocp_processor.remove_temp_cur_files(
                insights_local_dir)
            self.assertEqual(sorted(removed_files),
                             sorted(expected_delete_list))
            shutil.rmtree(insights_local_dir)
            shutil.rmtree(fake_dir)
Пример #12
0
    def test_download_current_report(self):
        fake_report_date = datetime.today().replace(day=1)
        fake_report_end_date = fake_report_date + relativedelta(months=+1)
        report_range = '{}-{}'.format(fake_report_date.strftime('%Y%m%d'),
                                      fake_report_end_date.strftime('%Y%m%d'))

        # Moto setup
        conn = boto3.resource('s3', region_name=self.selected_region)
        conn.create_bucket(Bucket=self.fake_bucket_name)

        # push mocked csvs into Moto env
        fake_csv_files = {}
        for x in range(0, random.randint(2, 10)):
            csv_filename = '{}.csv'.format('-'.join(
                self.fake.words(random.randint(2, 5))))

            # mocked report file definition
            fake_report_file = '{}/{}/{}/{}/{}'.format(self.fake_bucket_prefix,
                                                       self.fake_report_name,
                                                       report_range,
                                                       uuid.uuid4(),
                                                       csv_filename)
            fake_csv_files[csv_filename] = fake_report_file

            fake_csv_body = ','.join(self.fake.words(random.randint(5, 10)))
            conn.Object(self.fake_bucket_name,
                        fake_report_file).put(Body=fake_csv_body)
            key = conn.Object(self.fake_bucket_name, fake_report_file).get()
            self.assertEqual(fake_csv_body, str(key['Body'].read(), 'utf-8'))

        # mocked Manifest definition
        selected_csv = random.choice(list(fake_csv_files.keys()))
        fake_object = '{}/{}/{}/{}-Manifest.json'.format(
            self.fake_bucket_prefix, self.fake_report_name, report_range,
            self.fake_report_name)
        fake_object_body = {'reportKeys': [fake_csv_files[selected_csv]]}

        # push mocked manifest into Moto env
        conn.Object(self.fake_bucket_name,
                    fake_object).put(Body=json.dumps(fake_object_body))
        key = conn.Object(self.fake_bucket_name, fake_object).get()
        self.assertEqual(fake_object_body, json.load(key['Body']))

        # actual test
        out = self.report_downloader.download_current_report()
        files_list = []
        for cur_dict in out:
            files_list.append(cur_dict['file'])
            self.assertIsNotNone(cur_dict['compression'])

        report_key = fake_object_body.get('reportKeys').pop()
        expected_assembly_id = utils.get_assembly_id_from_cur_key(report_key)
        expected_csv = '{}/{}/aws/{}/{}-{}'.format(DATA_DIR,
                                                   self.fake_customer_name,
                                                   self.fake_bucket_name,
                                                   expected_assembly_id,
                                                   selected_csv)
        self.assertEqual(files_list, [expected_csv])

        # Verify etag is stored
        for cur_dict in out:
            cur_file = cur_dict['file']
            file_name = cur_file.split('/')[-1]
            stats_recorder = ReportStatsDBAccessor(file_name)
            self.assertIsNotNone(stats_recorder.get_etag())

            # Cleanup
            stats_recorder.remove()
            stats_recorder.commit()

            stats_recorder2 = ReportStatsDBAccessor(file_name)
            self.assertIsNone(stats_recorder2.get_etag())
            stats_recorder.close_session()
            stats_recorder2.close_session()
Пример #13
0
def get_report_files(customer_name,
                     authentication,
                     billing_source,
                     provider_type,
                     schema_name,
                     report_name=None):
    """
    Task to download a Report.

    Note that report_name will be not optional once Koku can specify
    what report we should download.

    FIXME: A 2 hour timeout is arbitrarily set for in progress processing requests.
    Once we know a realistic processing time for the largest CUR file in production
    this value can be adjusted or made configurable.

    Args:
        customer_name     (String): Name of the customer owning the cost usage report.
        authentication    (String): Credential needed to access cost usage report
                                    in the backend provider.
        billing_source    (String): Location of the cost usage report in the backend provider.
        provider_type     (String): Koku defined provider type string.  Example: Amazon = 'AWS'
        schema_name       (String): Name of the DB schema
        report_name       (String): Name of the cost usage report to download.

    Returns:
        files (List) List of filenames with full local path.
               Example: ['/var/tmp/masu/my-report-name/aws/my-report-file.csv',
                         '/var/tmp/masu/other-report-name/aws/other-report-file.csv']

    """
    reports = _get_report_files(customer_name,
                                authentication,
                                billing_source,
                                provider_type,
                                report_name)

    # initiate chained async task
    LOG.info('Reports to be processed: %s', str(reports))
    for report_dict in reports:
        file_name = os.path.basename(report_dict.get('file'))
        stats = ReportStatsDBAccessor(file_name)
        started_date = stats.get_last_started_datetime()
        completed_date = stats.get_last_completed_datetime()
        stats.close_session()

        # Skip processing if already in progress.
        if started_date and not completed_date:
            expired_start_date = (started_date + datetime.timedelta(hours=2))\
                .replace(tzinfo=pytz.UTC)
            if DateAccessor().today().replace(tzinfo=pytz.UTC) < expired_start_date:
                LOG.info('Skipping processing task for %s since it was started at: %s.',
                         file_name, str(started_date))
                continue

        # Skip processing if complete.
        if started_date and completed_date:
            LOG.info('Skipping processing task for %s. Started on: %s and completed on: %s.',
                     file_name, str(started_date), str(completed_date))
            continue

        request = {'schema_name': schema_name,
                   'report_path': report_dict.get('file'),
                   'compression': report_dict.get('compression')}
        result = process_report_file.delay(**request)
        LOG.info('Processing task queued - File: %s, Task ID: %s',
                 report_dict.get('file'),
                 str(result))