def test_create_daily_archives(self, mock_divide, mock_s3_copy, mock_os): """Test that this method returns a file list.""" start_date = DateHelper().this_month_start daily_files = [ { "filename": "file_one", "filepath": "path/to/file_one" }, { "filename": "file_two", "filepath": "path/to/file_two" }, ] expected_filenames = ["path/to/file_one", "path/to/file_two"] mock_divide.return_value = daily_files file_name = "file" file_path = "path" result = create_daily_archives(1, "10001", self.ocp_provider_uuid, file_name, file_path, 1, start_date) self.assertEqual(result, expected_filenames) context = {"version": "1"} expected = [file_path] result = create_daily_archives(1, "10001", self.ocp_provider_uuid, "file", "path", 1, start_date, context=context) self.assertEqual(result, expected)
def test_create_daily_archives(self, mock_divide, mock_s3_copy, mock_os): """Test that this method returns a file list.""" start_date = DateHelper().this_month_start daily_files = [{"filename": "file_one"}, {"filename": "file_two"}] expected_filenames = ["file_one", "file_two"] mock_divide.return_value = daily_files result = create_daily_archives(1, "10001", self.ocp_provider_uuid, "file", "path", 1, start_date) self.assertEqual(result, expected_filenames)
def construct_parquet_reports(request_id, context, report_meta, payload_destination_path, report_file): """Build, upload and convert parquet reports.""" daily_parquet_files = create_daily_archives( request_id, report_meta["account"], report_meta["provider_uuid"], report_file, payload_destination_path, report_meta["manifest_id"], report_meta["date"], context, ) return daily_parquet_files
def extract_payload(url, request_id, context={}): # noqa: C901 """ Extract OCP usage report payload into local directory structure. Payload is expected to be a .tar.gz file that contains: 1. manifest.json - dictionary containing usage report details needed for report processing. Dictionary Contains: files - names of .csv usage reports for the manifest date - DateTime that the payload was created uuid - uuid for payload cluster_id - OCP cluster ID. 2. *.csv - Actual usage report for the cluster. Format is: Format is: <uuid>_report_name.csv On successful completion the report and manifest will be in a directory structure that the OCPReportDownloader is expecting. Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101 Once the files are extracted: 1. Provider account is retrieved for the cluster id. If no account is found we return. 2. Manifest database record is created which will establish the assembly_id and number of files 3. Report stats database record is created and is used as a filter to determine if the file has already been processed. 4. All report files that have not been processed will have the local path to that report file added to the report_meta context dictionary for that file. 5. Report file context dictionaries that require processing is added to a list which will be passed to the report processor. All context from report_meta is used by the processor. Args: url (String): URL path to payload in the Insights upload service.. request_id (String): Identifier associated with the payload context (Dict): Context for logging (account, etc) Returns: [dict]: keys: value files: [String], date: DateTime, cluster_id: String manifest_path: String, provider_uuid: String, provider_type: String schema_name: String manifest_id: Integer current_file: String """ temp_dir, temp_file_path, temp_file = download_payload(request_id, url, context) manifest_path = extract_payload_contents(request_id, temp_dir, temp_file_path, temp_file, context) # Open manifest.json file and build the payload dictionary. full_manifest_path = f"{temp_dir}/{manifest_path[0]}" report_meta = utils.get_report_details(os.path.dirname(full_manifest_path)) # Filter and get account from payload's cluster-id cluster_id = report_meta.get("cluster_id") if context: context["cluster_id"] = cluster_id account = get_account_from_cluster_id(cluster_id, request_id, context) if not account: msg = f"Recieved unexpected OCP report from {cluster_id}" LOG.error(log_json(request_id, msg, context)) shutil.rmtree(temp_dir) return None schema_name = account.get("schema_name") provider_type = account.get("provider_type") context["account"] = schema_name[4:] context["provider_type"] = provider_type report_meta["provider_uuid"] = account.get("provider_uuid") report_meta["provider_type"] = provider_type report_meta["schema_name"] = schema_name report_meta["account"] = schema_name[4:] report_meta["request_id"] = request_id # Create directory tree for report. usage_month = utils.month_date_range(report_meta.get("date")) destination_dir = f"{Config.INSIGHTS_LOCAL_REPORT_DIR}/{report_meta.get('cluster_id')}/{usage_month}" os.makedirs(destination_dir, exist_ok=True) # Copy manifest manifest_destination_path = f"{destination_dir}/{os.path.basename(report_meta.get('manifest_path'))}" shutil.copy(report_meta.get("manifest_path"), manifest_destination_path) # Save Manifest report_meta["manifest_id"] = create_manifest_entries(report_meta, request_id, context) # Copy report payload report_metas = [] for report_file in report_meta.get("files"): current_meta = report_meta.copy() subdirectory = os.path.dirname(full_manifest_path) payload_source_path = f"{subdirectory}/{report_file}" payload_destination_path = f"{destination_dir}/{report_file}" try: shutil.copy(payload_source_path, payload_destination_path) current_meta["current_file"] = payload_destination_path if not record_report_status(report_meta["manifest_id"], report_file, request_id, context): msg = f"Successfully extracted OCP for {report_meta.get('cluster_id')}/{usage_month}" LOG.info(log_json(request_id, msg, context)) create_daily_archives( request_id, report_meta["account"], report_meta["provider_uuid"], report_file, payload_destination_path, report_meta["manifest_id"], report_meta["date"], context, ) report_metas.append(current_meta) else: # Report already processed pass except FileNotFoundError: msg = f"File {str(report_file)} has not downloaded yet." LOG.debug(log_json(request_id, msg, context)) # Remove temporary directory and files shutil.rmtree(temp_dir) return report_metas