def get_report_details(report_directory): """ Get OCP usage report details from manifest file. Date range is aligned on the first day of the current month and ends on the first day of the next month from the specified date. Args: report_directory (String): base directory for report. Returns: (Dict): keys: value "file: String, cluster_id: String, payload_date: DateTime, manifest_path: String, uuid: String, manifest_path: String" """ manifest_path = "{}/{}".format(report_directory, "manifest.json") payload_dict = {} try: with open(manifest_path) as file: payload_dict = json.load(file) payload_dict["date"] = parser.parse(payload_dict["date"]) payload_dict["manifest_path"] = manifest_path except (OSError, IOError, KeyError): LOG.error("Unable to extract manifest data") return payload_dict
def upload_to_azure_container(storage_file_name, local_path, storage_file_path): """Upload data to a storage account. Args: storage_file_name (String): The container to upload file to local_path (String): The full local file system path of the file storage_file_path (String): The file path to upload to within container Returns: (Boolean): True if file was uploaded """ try: # Retrieve the connection string for use with the application. connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING") blob_service_client = BlobServiceClient.from_connection_string( connect_str) blob_client = blob_service_client.get_blob_client( container=storage_file_name, blob=storage_file_path) with open(local_path, "rb") as data: blob_client.upload_blob(data=data) LOG.info(f"uploaded {storage_file_name} to {storage_file_path}") except (CloudError, ClientException, IOError) as error: LOG.error(error) traceback.print_exc(file=sys.stderr) return False return True
def _remove_files(file_list): """Remove files.""" for file_path in file_list: try: os.remove(file_path) except FileNotFoundError: LOG.error(f"File {file_path} was not found.") raise FileNotFoundError
def ocp_route_file(insights_upload, local_path): """Route file to either Upload Service or local filesystem.""" if os.path.isdir(insights_upload): extract_payload(insights_upload, local_path) else: response = post_payload_to_ingest_service(insights_upload, local_path) if response.status_code == 202: LOG.info("File uploaded successfully.") else: LOG.error(f"{response.status_code} File upload failed.") LOG.info(response.text)
def _load_static_report_data(options): """Validate/load and set start_date if static file is provided.""" if not options.get("static_report_file"): return static_file = options.get("static_report_file") if not os.path.exists(static_file): LOG.error(f"file does not exist: '{static_file}'") sys.exit() LOG.info("Loading static data...") aws_tags = set() start_dates = [] end_dates = [] static_report_data = load_yaml(static_file) for generator_dict in static_report_data.get("generators"): for _, attributes in generator_dict.items(): start_date = get_start_date(attributes, options) generated_start_date = calculate_start_date(start_date) start_dates.append(generated_start_date) if attributes.get("end_date"): generated_end_date = calculate_end_date( generated_start_date, attributes.get("end_date")) elif options.get("end_date") and options.get( "end_date").date() != today().date(): generated_end_date = calculate_end_date( generated_start_date, options.get("end_date")) else: generated_end_date = today() if options.get("provider") == "azure": generated_end_date += datetime.timedelta(hours=24) end_dates.append(generated_end_date) attributes["start_date"] = str(generated_start_date) attributes["end_date"] = str(generated_end_date) if options.get("provider") == "aws": aws_tags.update(attributes.get("tags", {}).keys()) options["start_date"] = min(start_dates) latest_date = max(end_dates) last_day_of_month = calendar.monthrange(year=latest_date.year, month=latest_date.month)[1] options["end_date"] = latest_date.replace(day=last_day_of_month, hour=0, minute=0) options["static_report_data"] = static_report_data if options.get("provider") == "aws" and aws_tags: options["aws_tags"] = aws_tags return True
def upload_to_s3(bucket_name, bucket_file_path, local_path): """Upload data to an S3 bucket. Args: bucket_name (String): The name of the S3 bucket bucket_file_path (String): The path to store the file to local_path (String): The local file system path of the file Returns: (Boolean): True if file was uploaded """ uploaded = True try: s3_client = boto3.resource("s3") s3_client.Bucket(bucket_name).upload_file(local_path, bucket_file_path) msg = f"Uploaded {bucket_file_path} to s3 bucket {bucket_name}." LOG.info(msg) except (ClientError, BotoConnectionError, boto3.exceptions.S3UploadFailedError) as upload_err: LOG.error(upload_err) uploaded = False return uploaded
def upload_to_gcp_storage(bucket_name, source_file_name, destination_blob_name): """ Upload data to a GCP Storage Bucket. Args: bucket_name (String): The container to upload file to source_file_name (String): The full local file system path of the file destination_blob_name (String): Destination blob name to store in GCP. Returns: (Boolean): True if file was uploaded """ uploaded = True if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: LOG.warning("Please set your GOOGLE_APPLICATION_CREDENTIALS " "environment variable before attempting to load file into" "GCP Storage.") return False try: storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) blob = bucket.blob(destination_blob_name) blob.upload_from_filename(source_file_name) LOG.info( f"File {source_file_name} uploaded to GCP Storage {destination_blob_name}." ) except GoogleCloudError as upload_err: LOG.error(upload_err) uploaded = False return uploaded
def extract_payload(base_path, payload_file): """ Extract OCP usage report payload into local directory structure. Payload is expected to be a .tar.gz file that contains: 1. manifest.json - dictionary containing usage report details needed for report processing. Dictionary Contains: file - .csv usage report file name date - DateTime that the payload was created uuid - uuid for payload cluster_id - OCP cluster ID. 2. *.csv - Actual usage report for the cluster. Format is: Format is: <uuid>_report_name.csv On successful completion the report and manifest will be in a directory structure that the OCPReportDownloader is expecting. Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101 Args: basepath (String): base local directory path. payload_file (String): path to payload.tar.gz file containing report and manifest. Returns: None """ # Create temporary directory for initial file staging and verification temp_dir = tempfile.mkdtemp() # Extract tarball into temp directory try: mytar = TarFile.open(payload_file) mytar.extractall(path=temp_dir) files = mytar.getnames() manifest_path = [manifest for manifest in files if "manifest.json" in manifest] except ReadError as error: LOG.error("Unable to untar file. Reason: {}".format(str(error))) shutil.rmtree(temp_dir) return # Open manifest.json file and build the payload dictionary. full_manifest_path = "{}/{}".format(temp_dir, manifest_path[0]) report_meta = get_report_details(os.path.dirname(full_manifest_path)) # Create directory tree for report. usage_month = month_date_range(report_meta.get("date")) destination_dir = "{}/{}/{}".format(base_path, report_meta.get("cluster_id"), usage_month) os.makedirs(destination_dir, exist_ok=True) # Copy manifest manifest_destination_path = "{}/{}".format(destination_dir, os.path.basename(report_meta.get("manifest_path"))) shutil.copy(report_meta.get("manifest_path"), manifest_destination_path) # Copy report payload for report_file in report_meta.get("files"): subdirectory = os.path.dirname(full_manifest_path) payload_source_path = f"{subdirectory}/{report_file}" payload_destination_path = f"{destination_dir}/{report_file}" try: shutil.copy(payload_source_path, payload_destination_path) except FileNotFoundError: pass LOG.info("Successfully extracted OCP for {}/{}".format(report_meta.get("cluster_id"), usage_month)) # Remove temporary directory and files shutil.rmtree(temp_dir)
def gcp_bucket_to_dataset(gcp_bucket_name, file_name, dataset_name, table_name): """ Create a gcp dataset from a file stored in a bucket. Args: gcp_bucket_name (String): The container to upload file to file_name (String): The name of the file stored in GCP dataset_name (String): name for the created dataset in GCP table_name (String): name for the created dataset in GCP Returns: (Boolean): True if the dataset was created """ uploaded = True if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: LOG.warning( "Please set your GOOGLE_APPLICATION_CREDENTIALS " "environment variable before attempting to create a dataset.") return False try: bigquery_client = bigquery.Client() project_name = bigquery_client.project dataset_id = f"{project_name}.{dataset_name}" dataset = bigquery.Dataset(dataset_id) # delete dataset (does not error if it doesn't exist) and create fresh one bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) dataset = bigquery_client.create_dataset(dataset) table_id = f"{project_name}.{dataset_name}.{table_name}" # creates the job config with specifics job_config = bigquery.LoadJobConfig( write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, time_partitioning=bigquery.TimePartitioning(), schema=[ { "name": "billing_account_id", "type": "STRING", "mode": "NULLABLE" }, { "name": "service", "type": "RECORD", "fields": [ { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "description", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "sku", "type": "RECORD", "fields": [ { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "description", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "usage_start_time", "type": "TIMESTAMP", "mode": "NULLABLE" }, { "name": "usage_end_time", "type": "TIMESTAMP", "mode": "NULLABLE" }, { "name": "project", "type": "RECORD", "fields": [ { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "number", "type": "STRING", "mode": "NULLABLE" }, { "name": "name", "type": "STRING", "mode": "NULLABLE" }, { "name": "labels", "type": "RECORD", "fields": [ { "name": "key", "type": "STRING", "mode": "NULLABLE" }, { "name": "value", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "REPEATED", }, { "name": "ancestry_numbers", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "labels", "type": "RECORD", "fields": [ { "name": "key", "type": "STRING", "mode": "NULLABLE" }, { "name": "value", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "REPEATED", }, { "name": "system_labels", "type": "RECORD", "fields": [ { "name": "key", "type": "STRING", "mode": "NULLABLE" }, { "name": "value", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "REPEATED", }, { "name": "location", "type": "RECORD", "fields": [ { "name": "location", "type": "STRING", "mode": "NULLABLE" }, { "name": "country", "type": "STRING", "mode": "NULLABLE" }, { "name": "region", "type": "STRING", "mode": "NULLABLE" }, { "name": "zone", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "export_time", "type": "TIMESTAMP", "mode": "NULLABLE" }, { "name": "cost", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "currency", "type": "STRING", "mode": "NULLABLE" }, { "name": "currency_conversion_rate", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "usage", "type": "RECORD", "fields": [ { "name": "amount", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "unit", "type": "STRING", "mode": "NULLABLE" }, { "name": "amount_in_pricing_units", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "pricing_unit", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "credits", "type": "RECORD", "fields": [ { "name": "name", "type": "STRING", "mode": "NULLABLE" }, { "name": "amount", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "full_name", "type": "STRING", "mode": "NULLABLE" }, { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "type", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "REPEATED", }, { "name": "invoice", "type": "RECORD", "fields": [{ "name": "month", "type": "STRING", "mode": "NULLABLE" }], "mode": "NULLABLE", }, { "name": "cost_type", "type": "STRING", "mode": "NULLABLE" }, { "name": "adjustment_info", "type": "RECORD", "fields": [ { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "description", "type": "STRING", "mode": "NULLABLE" }, { "name": "mode", "type": "STRING", "mode": "NULLABLE" }, { "name": "type", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, ], ) uri = f"gs://{gcp_bucket_name}/{file_name}" load_job = bigquery_client.load_table_from_uri(uri, table_id, job_config=job_config) # waits for the job to finish, will raise an exception if it doesnt work load_job.result() # after the table is created, delete the file from the storage bucket storage_client = storage.Client() bucket = storage_client.bucket(gcp_bucket_name) blob = bucket.blob(file_name) blob.delete() LOG.info( f"Dataset {dataset_name} created in GCP bigquery under the table name {table_name}." ) except GoogleCloudError as upload_err: LOG.error(upload_err) uploaded = False return uploaded