def upload_to_azure_container(storage_file_name, local_path, storage_file_path): """Upload data to a storage account. Args: storage_file_name (String): The container to upload file to local_path (String): The full local file system path of the file storage_file_path (String): The file path to upload to within container Returns: (Boolean): True if file was uploaded """ try: # Retrieve the connection string for use with the application. connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING") blob_service_client = BlobServiceClient.from_connection_string( connect_str) blob_client = blob_service_client.get_blob_client( container=storage_file_name, blob=storage_file_path) with open(local_path, "rb") as data: blob_client.upload_blob(data=data) LOG.info(f"uploaded {storage_file_name} to {storage_file_path}") except (CloudError, ClientException, IOError) as error: LOG.error(error) traceback.print_exc(file=sys.stderr) return False return True
def copy_to_local_dir(local_dir_home, local_path, local_file_path=None): """Upload data to an local directory. Args: local_dir_home (String): Local file path representing the bucket local_path (String): The local file system path of the file local_file_path (String): The path to store the file to Returns: (Boolean): True if file was uploaded """ if not os.path.isdir(local_dir_home): LOG.info( f"Path does not exist for the local directory: {local_dir_home}") return False full_bucket_path = local_dir_home outpath = local_path if local_file_path: full_bucket_path = f"{local_dir_home}/{local_file_path}" outpath = local_file_path os.makedirs(os.path.dirname(full_bucket_path), exist_ok=True) shutil.copyfile(local_path, full_bucket_path) msg = f"Copied {outpath} to local directory {local_dir_home}." LOG.info(msg) return True
def _write_csv(output_file, data, header): """Output csv file data.""" LOG.info(f"Writing to {output_file.split('/')[-1]}") with open(output_file, "w") as file: writer = csv.DictWriter(file, fieldnames=header) writer.writeheader() for row in data: writer.writerow(row)
def _write_jsonl(output_file, data): """Output JSON Lines file data for bigquery.""" LOG.info(f"Writing to {output_file.split('/')[-1]}") with open(output_file, "w") as file: for row in data: json.dump(row, file) # each dictionary "row" is its own line in a JSONL file file.write("\n")
def load_static_report_data(options): """Load and set start and end dates if static file is provided.""" if not options.get("static_report_file"): options["start_date"] = datetime.now().replace(day=1, hour=0, minute=0, second=0, microsecond=0) options["end_date"] = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) return options LOG.info("Loading static data...") start_dates = {} end_dates = {} static_report_data = load_yaml(options.get("static_report_file")) for generator_dict in static_report_data.get("generators"): for genname, attributes in generator_dict.items(): generated_start_date = calculate_start_date( attributes.get("start_date")) start_dates[genname] = generated_start_date if attributes.get("end_date"): generated_end_date = calculate_end_date( generated_start_date, attributes.get("end_date")) else: generated_end_date = today() if options.get("provider") == "azure": generated_end_date += timedelta(hours=24) else: generated_end_date = generated_end_date.replace(hour=23, minute=59) end_dates[genname] = generated_end_date options["gen_starts"] = start_dates options["gen_ends"] = end_dates options["start_date"] = min(start_dates.values()) latest_date = max(end_dates.values()) last_day_of_month = calendar.monthrange(year=latest_date.year, month=latest_date.month)[1] if latest_date.month == datetime.now( ).month and latest_date.year == datetime.now().year: last_day_of_month = datetime.now( ).day # don't generate date into the future. options["end_date"] = latest_date.replace(day=last_day_of_month, hour=0, minute=0) return options
def run(provider_type, options): """Run nise.""" LOG.info("Creating reports...") if provider_type == "aws": aws_create_report(options) elif provider_type == "azure": azure_create_report(options) elif provider_type == "ocp": ocp_create_report(options) elif provider_type == "gcp": gcp_create_report(options)
def _load_static_report_data(options): """Validate/load and set start_date if static file is provided.""" if not options.get("static_report_file"): return static_file = options.get("static_report_file") if not os.path.exists(static_file): LOG.error(f"file does not exist: '{static_file}'") sys.exit() LOG.info("Loading static data...") aws_tags = set() start_dates = [] end_dates = [] static_report_data = load_yaml(static_file) for generator_dict in static_report_data.get("generators"): for _, attributes in generator_dict.items(): start_date = get_start_date(attributes, options) generated_start_date = calculate_start_date(start_date) start_dates.append(generated_start_date) if attributes.get("end_date"): generated_end_date = calculate_end_date( generated_start_date, attributes.get("end_date")) elif options.get("end_date") and options.get( "end_date").date() != today().date(): generated_end_date = calculate_end_date( generated_start_date, options.get("end_date")) else: generated_end_date = today() if options.get("provider") == "azure": generated_end_date += datetime.timedelta(hours=24) end_dates.append(generated_end_date) attributes["start_date"] = str(generated_start_date) attributes["end_date"] = str(generated_end_date) if options.get("provider") == "aws": aws_tags.update(attributes.get("tags", {}).keys()) options["start_date"] = min(start_dates) latest_date = max(end_dates) last_day_of_month = calendar.monthrange(year=latest_date.year, month=latest_date.month)[1] options["end_date"] = latest_date.replace(day=last_day_of_month, hour=0, minute=0) options["static_report_data"] = static_report_data if options.get("provider") == "aws" and aws_tags: options["aws_tags"] = aws_tags return True
def ocp_route_file(insights_upload, local_path): """Route file to either Upload Service or local filesystem.""" if os.path.isdir(insights_upload): extract_payload(insights_upload, local_path) else: response = post_payload_to_ingest_service(insights_upload, local_path) if response.status_code == 202: LOG.info("File uploaded successfully.") else: LOG.error(f"{response.status_code} File upload failed.") LOG.info(response.text)
def run(provider_type, options): """Run nise.""" static_data_bool = _load_static_report_data(options) if not options.get("start_date"): raise NiseError("'start_date' is required in static files.") if not static_data_bool: fix_dates(options, provider_type) LOG.info("Creating reports...") if provider_type == "aws": aws_create_report(options) elif provider_type == "azure": azure_create_report(options) elif provider_type == "ocp": ocp_create_report(options) elif provider_type == "gcp": gcp_create_report(options)
def post_payload_to_ingest_service(insights_upload, local_path): """POST the payload to Insights via header or basic auth.""" insights_account_id = os.environ.get("INSIGHTS_ACCOUNT_ID") insights_org_id = os.environ.get("INSIGHTS_ORG_ID") insights_user = os.environ.get("INSIGHTS_USER") insights_password = os.environ.get("INSIGHTS_PASSWORD") if os.path.isfile(local_path): file_info = os.stat(local_path) filesize = _convert_bytes(file_info.st_size) LOG.info(f"Upload File: ({local_path}) filesize is {filesize}.") with open(local_path, "rb") as upload_file: if insights_account_id and insights_org_id: header = { "identity": { "account_number": insights_account_id, "internal": { "org_id": insights_org_id } } } headers = { "x-rh-identity": base64.b64encode(json.dumps(header).encode("UTF-8")) } return requests.post( insights_upload, data={}, files={ "file": ("payload.tar.gz", upload_file, "application/vnd.redhat.hccm.tar+tgz") }, headers=headers, ) return requests.post( insights_upload, data={}, files={ "file": ("payload.tar.gz", upload_file, "application/vnd.redhat.hccm.tar+tgz") }, auth=(insights_user, insights_password), verify=False, )
def upload_to_s3(bucket_name, bucket_file_path, local_path): """Upload data to an S3 bucket. Args: bucket_name (String): The name of the S3 bucket bucket_file_path (String): The path to store the file to local_path (String): The local file system path of the file Returns: (Boolean): True if file was uploaded """ uploaded = True try: s3_client = boto3.resource("s3") s3_client.Bucket(bucket_name).upload_file(local_path, bucket_file_path) msg = f"Uploaded {bucket_file_path} to s3 bucket {bucket_name}." LOG.info(msg) except (ClientError, BotoConnectionError, boto3.exceptions.S3UploadFailedError) as upload_err: LOG.error(upload_err) uploaded = False return uploaded
def oci_create_report(options): """Create cost and usage report files.""" start_date = options.get("start_date") end_date = options.get("end_date") fake = Faker() attributes = {} attributes["tenant_id"] = f"ocid1.tenancy.oc1..{fake.pystr(min_chars=20, max_chars=50)}" generators = [ {"generator": OCIComputeGenerator, "attributes": attributes}, {"generator": OCINetworkGenerator, "attributes": attributes}, {"generator": OCIBlockStorageGenerator, "attributes": attributes}, {"generator": OCIDatabaseGenerator, "attributes": attributes}, ] months = _create_month_list(start_date, end_date) currency = default_currency(options.get("currency"), static_currency=None) # write_monthly = options.get("write_monthly", False) file_number = 0 for month in months: data = {OCI_COST_REPORT: [], OCI_USAGE_REPORT: []} monthly_files = [] for report_type in OCI_REPORT_TYPE_TO_COLS: LOG.info(f"Generating data for OCI for {month.get('name')}") for generator in generators: generator_cls = generator.get("generator") attributes = generator.get("attributes") gen_start_date = month.get("start") gen_end_date = month.get("end") gen = generator_cls(gen_start_date, gen_end_date, currency, report_type, attributes) for hour in gen.generate_data(report_type=report_type): data[report_type] += [hour] month_output_file = write_oci_file(report_type, file_number, data[report_type], options) monthly_files.append(month_output_file) file_number += 1
def upload_to_gcp_storage(bucket_name, source_file_name, destination_blob_name): """ Upload data to a GCP Storage Bucket. Args: bucket_name (String): The container to upload file to source_file_name (String): The full local file system path of the file destination_blob_name (String): Destination blob name to store in GCP. Returns: (Boolean): True if file was uploaded """ uploaded = True if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: LOG.warning("Please set your GOOGLE_APPLICATION_CREDENTIALS " "environment variable before attempting to load file into" "GCP Storage.") return False try: storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) blob = bucket.blob(destination_blob_name) blob.upload_from_filename(source_file_name) LOG.info( f"File {source_file_name} uploaded to GCP Storage {destination_blob_name}." ) except GoogleCloudError as upload_err: LOG.error(upload_err) uploaded = False return uploaded
def _gcp_bigquery_process( start_date, end_date, currency, projects, generators, options, gcp_bucket_name, gcp_dataset_name, gcp_table_name ): data = [] for project in projects: num_gens = len(generators) ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5 LOG.info(f"Producing data for {num_gens} generators for start: {start_date} and end: {end_date}.") for count, generator in enumerate(generators): attributes = generator.get("attributes", {}) if attributes: start_date = attributes.get("start_date") end_date = attributes.get("end_date") generator_cls = generator.get("generator") gen = generator_cls(start_date, end_date, currency, project, attributes=attributes) for hour in gen.generate_data(): data += [hour] count += 1 if count % ten_percent == 0: LOG.info(f"Done with {count} of {num_gens} generators.") monthly_files = [] local_file_path, output_file_name = write_gcp_file_jsonl(start_date, end_date, data, options) monthly_files.append(local_file_path) if gcp_bucket_name: gcp_route_file(gcp_bucket_name, local_file_path, output_file_name) if not gcp_table_name: etag = options.get("gcp_etag") if options.get("gcp_etag") else str(uuid4()) gcp_table_name = f"gcp_billing_export_{etag}" gcp_bucket_to_dataset(gcp_bucket_name, output_file_name, gcp_dataset_name, gcp_table_name) return monthly_files
def replace_args(args, yaml, provider, ocp_on_cloud): """Replace appropriate file paths in args.""" if not yaml: raise KeyError( f"Options YAML error: {provider} is not defined under {ocp_on_cloud}" ) from nise.yaml_gen import STATIC_DIR args.provider = provider if yaml.get(f"{provider}-output-filename"): args.output_file_name = yaml.get(f"{provider}-output-filename") else: LOG.info( f"Output file not defined for {provider} under {ocp_on_cloud}. Writing to '{ocp_on_cloud}_{provider}.yml'." ) args.output_file_name = f"{ocp_on_cloud}_{provider}.yml" if args.default: template_file_name = os.path.join(STATIC_DIR, yaml.get(f"{provider}-template")) config_file_name = os.path.join(STATIC_DIR, yaml.get(f"{provider}-gen-config")) else: template_file_name = yaml.get(f"{provider}-template") config_file_name = yaml.get(f"{provider}-gen-config") if template_file_name: args.template_file_name = template_file_name else: LOG.info( f"Template not defined for {provider} under {ocp_on_cloud}. Using default template." ) args.template_file_name = os.path.join( STATIC_DIR, f"{provider}_static_data.yml.j2") if config_file_name: args.config_file_name = config_file_name else: LOG.info( f"Configuration not defined for {provider} under {ocp_on_cloud}. Using default configuration." ) args.config_file_name = None
def azure_create_report(options): # noqa: C901 """Create a cost usage report file.""" data = [] if not (options.get("start_date") and options.get("end_date")): options = load_static_report_data(options) start_date = options.get("start_date") end_date = options.get("end_date") months = _create_month_list(start_date, end_date, options.get("days_per_month")) meter_cache = {} # The options params are not going to change so we don't # have to keep resetting the var inside of the for loop azure_container_name = options.get("azure_container_name") storage_account_name = options.get("azure_account_name") azure_prefix_name = options.get("azure_prefix_name") azure_report_name = options.get("azure_report_name") write_monthly = options.get("write_monthly", False) for month in months: data = [] monthly_files = [] num_gens = len(AZURE_GENERATORS) ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5 LOG.info( f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}." ) for count, generator in enumerate(AZURE_GENERATORS): gen_start_date = options.get("gen_starts", {}).get(generator.__name__, month.get("start")) gen_end_date = options.get("gen_ends", {}).get(generator.__name__, month.get("end")) # Skip if generator usage is outside of current month if gen_end_date < month.get("start"): continue if gen_start_date > month.get("end"): continue gen_start_date, gen_end_date = _create_generator_dates_from_yaml( options, month) gen = generator(gen_start_date, gen_end_date, meter_cache, user_config=options.get("static_report_file")) data += gen.generate_data() meter_cache = gen.get_meter_cache() if count % ten_percent == 0: LOG.info(f"Done with {count} of {num_gens} generators.") local_path, output_file_name = _generate_azure_filename() date_range = _generate_azure_date_range(month) _write_csv(local_path, data, AZURE_COLUMNS) monthly_files.append(local_path) if azure_container_name: file_path = "" if azure_prefix_name: file_path += azure_prefix_name + "/" file_path += azure_report_name + "/" file_path += date_range + "/" file_path += output_file_name # azure blob upload storage_account_name = options.get("azure_account_name", None) if storage_account_name: azure_route_file(storage_account_name, azure_container_name, local_path, file_path) # local dir upload else: azure_route_file(azure_container_name, file_path, local_path) if not write_monthly: _remove_files(monthly_files)
def build_data(self, config, _random=False): # noqa: C901 """ Build a structure to fill out a nise yaml template. Struture has the form of: {start_date: date, (config.start_date) ens_date: date, (config.end_date) nodes: [ (number of nodes controlled by config.max_nodes) {node_name: str, (dynamic) cpu_cores: int, (config.max_node_cpu_cores) memory_gig: int, (config.max_node_memory_gig) resource_id: str, (dynamic) namespaces: [ (number of namespaces controlled by config.max_node_namespaces) {namespace: str, (dynamic) pods: [ (number of pods controlled by config.max_node_namespace_pods) pod_name: str, (dynamic) cpu_request: int, (config.max_node_namespace_pod_cpu_request) mem_request_gig: int, (config.max_node_namespace_pod_mem_request_gig) cpu_limit: int, (config.max_node_namespace_pod_cpu_limit) mem_limit_gig: int, (config.max_node_namespace_pod_mem_limit_gig) pod_seconds: int, (config.max_node_namespace_pod_seconds) labels: str (dynamic) ], volumes: [ volume_name: str, storage_class: str, volume_request_gig: int, labels: str, volume_claims: [ volume_claim_name: str, pod_name: str, labels: str, capacity_gig: int ] ]} ]} ]} Parameters: config : dicta Returns: dicta """ LOG.info("Data build starting") data = dicta(start_date=str(config.start_date), end_date=str(config.end_date), nodes=[], resourceid_labels=None) resourceid_labels = {} if _random: max_nodes = FAKER.random_int(1, config.max_nodes) else: max_nodes = config.max_nodes for node_ix in range(max_nodes): LOG.info(f"Building node {node_ix + 1}/{max_nodes}...") if _random: cores = FAKER.random_int(1, config.max_node_cpu_cores) memory = FAKER.random_int(1, config.max_node_memory_gig) else: cores = config.max_node_cpu_cores memory = config.max_node_memory_gig resource_id = generate_resource_id(config) node_name = generate_name(config) id_label_key = (resource_id, node_name) resourceid_labels[id_label_key] = [] node = dicta(name=node_name, cpu_cores=cores, memory_gig=memory, resource_id=resource_id, namespaces=[]) data.nodes.append(node) if _random: max_namespaces = FAKER.random_int(1, config.max_node_namespaces) else: max_namespaces = config.max_node_namespaces for namespace_ix in range(max_namespaces): LOG.info( f"Building node {node_ix + 1}/{max_nodes}; namespace {namespace_ix + 1}/{max_namespaces}..." ) namespace = dicta(name=generate_name(config, prefix=node.name), pods=[], volumes=[]) node.namespaces.append(namespace) if _random: max_pods = FAKER.random_int(1, config.max_node_namespace_pods) else: max_pods = config.max_node_namespace_pods LOG.info(f"Building {max_pods} pods...") for pod_ix in range(max_pods): if _random: cpu_req = FAKER.random_int(1, node.cpu_cores) mem_req = FAKER.random_int(1, node.memory_gig) cpu_lim = FAKER.random_int(1, node.cpu_cores) mem_lim = FAKER.random_int(1, node.memory_gig) pod_sec = FAKER.random_int( config.min_node_namespace_pod_seconds, config.max_node_namespace_pod_seconds, step=(config.max_node_namespace_pod_seconds // 10) or 1800, ) else: cpu_lim = cpu_req = node.cpu_cores mem_lim = mem_req = node.memory_gig pod_sec = config.max_node_namespace_pod_seconds pod_labels = generate_labels( config.max_node_namespace_pod_labels) resourceid_labels[id_label_key].append(pod_labels) pod = dicta( name=generate_name(config, prefix=namespace.name + "-pod", suffix=str(pod_ix), dynamic=False), cpu_request=cpu_req, mem_request_gig=mem_req, cpu_limit=cpu_lim, mem_limit_gig=mem_lim, pod_seconds=pod_sec, labels=pod_labels, ) namespace.pods.append(pod) if _random: max_volumes = FAKER.random_int( 1, config.max_node_namespace_volumes) else: max_volumes = config.max_node_namespace_volumes LOG.info(f"Building {max_volumes} volumes...") for volume_ix in range(max_volumes): if _random: storage_cls = config.storage_classes[FAKER.random_int( 0, len(config.storage_classes) - 1)] vol_req = FAKER.random_int( 1, config.max_node_namespace_volume_request_gig) else: storage_cls = config.storage_classes[0] vol_req = config.max_node_namespace_volume_request_gig volume_labels = generate_labels( config.max_node_namespace_volume_labels) resourceid_labels[id_label_key].append(volume_labels) volume = dicta( name=generate_name(config, prefix=namespace.name + "-vol", suffix=str(volume_ix), dynamic=False), storage_class=storage_cls, volume_request_gig=vol_req, labels=volume_labels, volume_claims=[], ) namespace.volumes.append(volume) if _random: max_volume_claims = FAKER.random_int( 1, config.max_node_namespace_volume_volume_claims) else: max_volume_claims = config.max_node_namespace_volume_volume_claims for volume_claim_ix in range(max_volume_claims): if _random: cap = FAKER.random_int( 1, config. max_node_namespace_volume_volume_claim_capacity_gig ) else: cap = config.max_node_namespace_volume_volume_claim_capacity_gig pod_name = namespace.pods[-1 if volume_claim_ix >= len( namespace.pods) else volume_claim_ix].name volume_claim_labels = generate_labels( config. max_node_namespace_volume_volume_claim_labels) resourceid_labels[id_label_key].append( volume_claim_labels) volume_claim = dicta( name=generate_name( config, prefix=namespace.name + "-vol-claim", suffix=str(volume_claim_ix), dynamic=False, ), pod_name=pod_name, labels=volume_claim_labels, capacity_gig=cap, ) volume.volume_claims.append(volume_claim) data.resourceid_labels = resourceid_labels return data
def build_data(self, config, _random=False): # noqa: C901 """ """ LOG.info("Data build starting") data = dicta( payer=config.payer_account, bandwidth_gens=[], sql_gens=[], storage_gens=[], vmachine_gens=[], vnetwork_gens=[], ) max_bandwidth_gens = FAKER.random_int( 0, config.max_bandwidth_gens ) if _random else config.max_bandwidth_gens max_sql_gens = FAKER.random_int( 0, config.max_sql_gens) if _random else config.max_sql_gens max_storage_gens = FAKER.random_int( 0, config.max_storage_gens) if _random else config.max_storage_gens max_vmachine_gens = FAKER.random_int( 0, config.max_vmachine_gens) if _random else config.max_vmachine_gens max_vnetwork_gens = FAKER.random_int( 0, config.max_vnetwork_gens) if _random else config.max_vnetwork_gens LOG.info(f"Building {max_bandwidth_gens} Bandwidth generators ...") for _ in range(max_bandwidth_gens): data.bandwidth_gens.append( generate_azure_dicta(config, "bandwidth")) LOG.info(f"Building {max_sql_gens} SQL generators ...") for _ in range(max_sql_gens): data.sql_gens.append(generate_azure_dicta(config, "sql")) LOG.info(f"Building {max_storage_gens} Storage generators ...") for _ in range(max_storage_gens): data.storage_gens.append(generate_azure_dicta(config, "storage")) LOG.info( f"Building {max_vmachine_gens} Virtual Machine generators ...") for _ in range(max_vmachine_gens): data.vmachine_gens.append(generate_azure_dicta(config, "vmachine")) LOG.info( f"Building {max_vnetwork_gens} Virtual Network generators ...") for _ in range(max_vnetwork_gens): data.vnetwork_gens.append(generate_azure_dicta(config, "vnetwork")) return data
def ocp_create_report(options): # noqa: C901 """Create a usage report file.""" start_date = options.get("start_date") end_date = options.get("end_date") cluster_id = options.get("ocp_cluster_id") static_report_data = options.get("static_report_data") if static_report_data: generators = _get_generators(static_report_data.get("generators")) else: generators = [{"generator": OCPGenerator, "attributes": {}}] months = _create_month_list(start_date, end_date) insights_upload = options.get("insights_upload") write_monthly = options.get("write_monthly", False) for month in months: data = {OCP_POD_USAGE: [], OCP_STORAGE_USAGE: [], OCP_NODE_LABEL: [], OCP_NAMESPACE_LABEL: []} file_numbers = {OCP_POD_USAGE: 0, OCP_STORAGE_USAGE: 0, OCP_NODE_LABEL: 0, OCP_NAMESPACE_LABEL: 0} monthly_files = [] for generator in generators: generator_cls = generator.get("generator") attributes = generator.get("attributes") gen_start_date = month.get("start") gen_end_date = month.get("end") if attributes: # Skip if generator usage is outside of current month if attributes.get("end_date") < month.get("start"): continue if attributes.get("start_date") > month.get("end"): continue gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month) gen = generator_cls(gen_start_date, gen_end_date, attributes) for report_type in gen.ocp_report_generation.keys(): LOG.info(f"Generating data for {report_type} for {month.get('name')}") for hour in gen.generate_data(report_type): data[report_type] += [hour] if len(data[report_type]) == options.get("row_limit"): file_numbers[report_type] += 1 month_output_file = write_ocp_file( file_numbers[report_type], cluster_id, month.get("name"), gen_start_date.year, report_type, data[report_type], ) monthly_files.append(month_output_file) data[report_type].clear() for report_type in gen.ocp_report_generation.keys(): if file_numbers[report_type] != 0: file_numbers[report_type] += 1 month_output_file = write_ocp_file( file_numbers[report_type], cluster_id, month.get("name"), gen_start_date.year, report_type, data[report_type], ) monthly_files.append(month_output_file) if insights_upload: # Generate manifest for all files ocp_assembly_id = uuid4() report_datetime = gen_start_date temp_files = {} for num_file in range(len(monthly_files)): temp_filename = f"{ocp_assembly_id}_openshift_report.{num_file}.csv" temp_usage_file = create_temporary_copy(monthly_files[num_file], temp_filename, "payload") temp_files[temp_filename] = temp_usage_file manifest_file_names = ", ".join(f'"{w}"' for w in temp_files) cr_status = { "clusterID": "4e009161-4f40-42c8-877c-3e59f6baea3d", "clusterVersion": "stable-4.6", "api_url": "https://console.redhat.com", "authentication": {"type": "token"}, "packaging": {"max_reports_to_store": 30, "max_size_MB": 100}, "upload": { "ingress_path": "/api/ingress/v1/upload", "upload": "True", "upload_wait": 27, "upload_cycle": 360, }, "operator_commit": __version__, "prometheus": { "prometheus_configured": "True", "prometheus_connected": "True", "last_query_start_time": "2021-07-28T12:22:37Z", "last_query_success_time": "2021-07-28T12:22:37Z", "service_address": "https://thanos-querier.openshift-monitoring.svc:9091", }, "reports": { "report_month": "07", "last_hour_queried": "2021-07-28 11:00:00 - 2021-07-28 11:59:59", "data_collected": "True", }, "source": { "sources_path": "/api/sources/v1.0/", "name": "INSERT-SOURCE-NAME", "create_source": "False", "check_cycle": 1440, }, } cr_status = json.dumps(cr_status) manifest_values = { "ocp_cluster_id": cluster_id, "ocp_assembly_id": ocp_assembly_id, "report_datetime": report_datetime, "files": manifest_file_names[1:-1], "start": gen_start_date, "end": gen_end_date, "version": __version__, "certified": False, "cr_status": cr_status, } manifest_data = ocp_generate_manifest(manifest_values) temp_manifest = _write_manifest(manifest_data) temp_manifest_name = create_temporary_copy(temp_manifest, "manifest.json", "payload") # Tarball and upload files individually for temp_usage_file in temp_files.values(): report_files = [temp_usage_file, temp_manifest_name] temp_usage_zip = _tar_gzip_report_files(report_files) ocp_route_file(insights_upload, temp_usage_zip) os.remove(temp_usage_file) os.remove(temp_usage_zip) os.remove(temp_manifest) os.remove(temp_manifest_name) if not write_monthly: LOG.info("Cleaning up local directory") _remove_files(monthly_files)
def gcp_create_report(options): # noqa: C901 """Create a GCP cost usage report file.""" fake = Faker() gcp_bucket_name = options.get("gcp_bucket_name") gcp_dataset_name = options.get("gcp_dataset_name") gcp_table_name = options.get("gcp_table_name") start_date = options.get("start_date") end_date = options.get("end_date") static_report_data = options.get("static_report_data") if gcp_dataset_name: # if the file is supposed to be uploaded to a bigquery table, it needs the JSONL version of everything if static_report_data: generators = _get_jsonl_generators(static_report_data.get("generators")) static_projects = static_report_data.get("projects", {}) projects = [] for static_dict in static_projects: # this lets the format of the YAML remain the same whether using the upload or local project = {} project["name"] = static_dict.get("project.name", "") project["id"] = static_dict.get("project.id", "") # the k:v pairs are split by ; and the keys and values split by : static_labels = static_dict.get("project.labels", []) labels = [] if static_labels: for pair in static_labels.split(";"): key = pair.split(":")[0] value = pair.split(":")[1] labels.append({"key": key, "value": value}) project["labels"] = labels location = {} location["location"] = static_dict.get("location.location", "") location["country"] = static_dict.get("location.country", "") location["region"] = static_dict.get("location.region", "") location["zone"] = static_dict.get("location.zone", "") row = { "billing_account_id": static_dict.get("billing_account_id", ""), "project": project, "location": location, } projects.append(row) currency = default_currency(options.get("currency"), get_gcp_static_currency(generators)) else: generators = [ {"generator": JSONLCloudStorageGenerator, "attributes": {}}, {"generator": JSONLComputeEngineGenerator, "attributes": {}}, {"generator": JSONLGCPNetworkGenerator, "attributes": {}}, {"generator": JSONLGCPDatabaseGenerator, "attributes": {}}, ] account = fake.word() project_generator = JSONLProjectGenerator(account) projects = project_generator.generate_projects() currency = default_currency(options.get("currency"), None) elif static_report_data: generators = _get_generators(static_report_data.get("generators")) projects = static_report_data.get("projects") processed_projects = copy.deepcopy(projects) for i, project in enumerate(projects): labels = [] static_labels = project.get("project.labels", []) if static_labels: for pair in static_labels.split(";"): key = pair.split(":")[0] value = pair.split(":")[1] labels.append({"key": key, "value": value}) processed_projects[i]["project.labels"] = json.dumps(labels) projects = processed_projects else: generators = [ {"generator": CloudStorageGenerator, "attributes": {}}, {"generator": ComputeEngineGenerator, "attributes": {}}, {"generator": GCPNetworkGenerator, "attributes": {}}, {"generator": GCPDatabaseGenerator, "attributes": {}}, ] account = fake.word() project_generator = ProjectGenerator(account) projects = project_generator.generate_projects() if gcp_dataset_name: monthly_files = _gcp_bigquery_process( start_date, end_date, currency, projects, generators, options, gcp_bucket_name, gcp_dataset_name, gcp_table_name, ) else: months = _create_month_list(start_date, end_date) monthly_files = [] output_files = [] for month in months: data = [] gen_start_date = month.get("start") gen_end_date = month.get("end") for project in projects: num_gens = len(generators) ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5 LOG.info( f"Producing data for {num_gens} generators for start: {gen_start_date} and end: {gen_end_date}." ) for count, generator in enumerate(generators): attributes = generator.get("attributes", {}) if attributes: start_date = attributes.get("start_date") end_date = attributes.get("end_date") currency = default_currency(options.get("currency"), attributes.get("currency")) else: currency = default_currency(options.get("currency"), None) if gen_end_date > end_date: gen_end_date = end_date generator_cls = generator.get("generator") gen = generator_cls(gen_start_date, gen_end_date, currency, project, attributes=attributes) for hour in gen.generate_data(): data += [hour] count += 1 if count % ten_percent == 0: LOG.info(f"Done with {count} of {num_gens} generators.") local_file_path, output_file_name = write_gcp_file(gen_start_date, gen_end_date, data, options) output_files.append(output_file_name) monthly_files.append(local_file_path) for index, month_file in enumerate(monthly_files): if gcp_bucket_name: gcp_route_file(gcp_bucket_name, month_file, output_files[index]) write_monthly = options.get("write_monthly", False) if not write_monthly: _remove_files(monthly_files)
def aws_create_report(options): # noqa: C901 """Create a cost usage report file.""" data = [] start_date = options.get("start_date") end_date = options.get("end_date") aws_finalize_report = options.get("aws_finalize_report") static_report_data = options.get("static_report_data") manifest_gen = True if options.get("manifest_generation") is None else options.get("manifest_generation") if static_report_data: generators = _get_generators(static_report_data.get("generators")) accounts_list = static_report_data.get("accounts") else: generators = [ {"generator": DataTransferGenerator, "attributes": {}}, {"generator": EBSGenerator, "attributes": {}}, {"generator": EC2Generator, "attributes": {}}, {"generator": S3Generator, "attributes": {}}, {"generator": RDSGenerator, "attributes": {}}, {"generator": Route53Generator, "attributes": {}}, {"generator": VPCGenerator, "attributes": {}}, {"generator": MarketplaceGenerator, "attributes": {}}, ] accounts_list = None months = _create_month_list(start_date, end_date) payer_account, usage_accounts, currency_code = _generate_accounts(accounts_list) currency_code = default_currency(options.get("currency"), currency_code) aws_bucket_name = options.get("aws_bucket_name") aws_report_name = options.get("aws_report_name") write_monthly = options.get("write_monthly", False) for month in months: data = [] file_number = 0 monthly_files = [] fake = Faker() num_gens = len(generators) ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5 LOG.info(f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}.") for count, generator in enumerate(generators): generator_cls = generator.get("generator") attributes = generator.get("attributes") gen_start_date = month.get("start") gen_end_date = month.get("end") if attributes: # Skip if generator usage is outside of current month if attributes.get("end_date") < month.get("start"): continue if attributes.get("start_date") > month.get("end"): continue gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month) gen = generator_cls( gen_start_date, gen_end_date, currency_code, payer_account, usage_accounts, attributes, options.get("aws_tags"), ) num_instances = 1 if attributes else randint(2, 60) for _ in range(num_instances): for hour in gen.generate_data(): data += [hour] if len(data) == options.get("row_limit"): file_number += 1 month_output_file = write_aws_file( file_number, aws_report_name, month.get("name"), gen_start_date.year, data, aws_finalize_report, static_report_data, gen.AWS_COLUMNS, ) monthly_files.append(month_output_file) data.clear() if count % ten_percent == 0: LOG.info(f"Done with {count} of {num_gens} generators.") if file_number != 0: file_number += 1 month_output_file = write_aws_file( file_number, aws_report_name, month.get("name"), gen_start_date.year, data, aws_finalize_report, static_report_data, gen.AWS_COLUMNS, ) monthly_files.append(month_output_file) if aws_bucket_name: manifest_values = {"account": payer_account} manifest_values.update(options) manifest_values["start_date"] = gen_start_date manifest_values["end_date"] = gen_end_date manifest_values["file_names"] = monthly_files if not manifest_gen: s3_cur_path, _ = aws_generate_manifest(fake, manifest_values) for monthly_file in monthly_files: temp_cur_zip = _gzip_report(monthly_file) destination_file = "{}/{}.gz".format(s3_cur_path, os.path.basename(monthly_file)) aws_route_file(aws_bucket_name, destination_file, temp_cur_zip) os.remove(temp_cur_zip) else: s3_cur_path, manifest_data = aws_generate_manifest(fake, manifest_values) s3_month_path = os.path.dirname(s3_cur_path) s3_month_manifest_path = s3_month_path + "/" + aws_report_name + "-Manifest.json" s3_assembly_manifest_path = s3_cur_path + "/" + aws_report_name + "-Manifest.json" temp_manifest = _write_manifest(manifest_data) aws_route_file(aws_bucket_name, s3_month_manifest_path, temp_manifest) aws_route_file(aws_bucket_name, s3_assembly_manifest_path, temp_manifest) for monthly_file in monthly_files: temp_cur_zip = _gzip_report(monthly_file) destination_file = "{}/{}.gz".format(s3_cur_path, os.path.basename(monthly_file)) aws_route_file(aws_bucket_name, destination_file, temp_cur_zip) os.remove(temp_cur_zip) os.remove(temp_manifest) if not write_monthly: _remove_files(monthly_files)
def azure_create_report(options): # noqa: C901 """Create a cost usage report file.""" data = [] start_date = options.get("start_date") end_date = options.get("end_date") static_report_data = options.get("static_report_data") if static_report_data: generators = _get_generators(static_report_data.get("generators")) accounts_list = static_report_data.get("accounts") else: generators = [ {"generator": BandwidthGenerator, "attributes": {}}, {"generator": SQLGenerator, "attributes": {}}, {"generator": StorageGenerator, "attributes": {}}, {"generator": VMGenerator, "attributes": {}}, {"generator": VNGenerator, "attributes": {}}, ] accounts_list = None months = _create_month_list(start_date, end_date) account_info = _generate_azure_account_info(accounts_list) currency = default_currency(options.get("currency"), account_info["currency_code"]) meter_cache = {} # The options params are not going to change so we don't # have to keep resetting the var inside of the for loop azure_container_name = options.get("azure_container_name") storage_account_name = options.get("azure_account_name") azure_prefix_name = options.get("azure_prefix_name") azure_report_name = options.get("azure_report_name") version_two = options.get("version_two", False) write_monthly = options.get("write_monthly", False) for month in months: data = [] monthly_files = [] num_gens = len(generators) ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5 LOG.info(f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}.") for count, generator in enumerate(generators): generator_cls = generator.get("generator") attributes = generator.get("attributes", {}) gen_start_date = month.get("start") gen_end_date = month.get("end") if attributes: # Skip if generator usage is outside of current month if attributes.get("end_date") < month.get("start"): continue if attributes.get("start_date") > month.get("end"): continue else: attributes = {"end_date": end_date, "start_date": start_date} gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month) if attributes.get("meter_cache"): meter_cache.update(attributes.get("meter_cache")) # needed so that meter_cache can be defined in yaml attributes["meter_cache"] = meter_cache attributes["version_two"] = version_two gen = generator_cls(gen_start_date, gen_end_date, currency, account_info, attributes) azure_columns = gen.azure_columns data += gen.generate_data() meter_cache = gen.get_meter_cache() if count % ten_percent == 0: LOG.info(f"Done with {count} of {num_gens} generators.") local_path, output_file_name = _generate_azure_filename() date_range = _generate_azure_date_range(month) _write_csv(local_path, data, azure_columns) monthly_files.append(local_path) if azure_container_name: file_path = "" if azure_prefix_name: file_path += azure_prefix_name + "/" file_path += azure_report_name + "/" file_path += date_range + "/" file_path += output_file_name # azure blob upload storage_account_name = options.get("azure_account_name", None) if storage_account_name: azure_route_file(storage_account_name, azure_container_name, local_path, file_path) # local dir upload else: azure_route_file(azure_container_name, file_path, local_path) if not write_monthly: _remove_files(monthly_files)
def build_data(self, config, _random=False): # noqa: C901 """Build the data.""" LOG.info("Data build starting") data = dicta( payer=config.payer_account, data_transfer_gens=[], ebs_gens=[], ec2_gens=[], rds_gens=[], route53_gens=[], s3_gens=[], vpc_gens=[], users=[], ) max_data_transfer_gens = (FAKER.random_int( 0, config.max_data_transfer_gens) if _random else config.max_data_transfer_gens) max_ebs_gens = FAKER.random_int( 0, config.max_ebs_gens) if _random else config.max_ebs_gens max_ec2_gens = FAKER.random_int( 0, config.max_ec2_gens) if _random else config.max_ec2_gens max_rds_gens = FAKER.random_int( 0, config.max_rds_gens) if _random else config.max_rds_gens max_route53_gens = FAKER.random_int( 0, config.max_route53_gens) if _random else config.max_route53_gens max_s3_gens = FAKER.random_int( 0, config.max_s3_gens) if _random else config.max_s3_gens max_vpc_gens = FAKER.random_int( 0, config.max_vpc_gens) if _random else config.max_vpc_gens max_users = FAKER.random_int( 0, config.max_users) if _random else config.max_users LOG.info( f"Building {max_data_transfer_gens} data transfer generators ...") for _ in range(max_data_transfer_gens): _rate, _amount = RATE_AMT.get("DTG") data_transfer_gen = initialize_dicta("DTG", config) data_transfer_gen.update(amount=round(next(_amount), 5), rate=round(next(_rate), 5)) data.data_transfer_gens.append(data_transfer_gen) LOG.info(f"Building {max_ebs_gens} EBS generators ...") for _ in range(max_ebs_gens): _rate, _amount = RATE_AMT.get("EBS") ebs_gen = initialize_dicta("EBS", config) ebs_gen.update(amount=round(next(_amount), 5), rate=round(next(_rate), 5)) data.ebs_gens.append(ebs_gen) LOG.info(f"Building {max_ec2_gens} EC2 generators ...") for _ in range(max_ec2_gens): instance_type = random.choice(EC2_INSTANCES) ec2_gen = initialize_dicta("EC2", config) ec2_gen.update( processor_arch=instance_type.get("processor_arch"), region=random.choice(REGIONS), instance_type=instance_type, ) data.ec2_gens.append(ec2_gen) LOG.info(f"Building {max_rds_gens} RDS generators ...") for _ in range(max_rds_gens): instance_type = random.choice(RDS_INSTANCES) rds_gen = initialize_dicta("RDS", config) rds_gen.update( processor_arch=instance_type.get("processor_arch"), region=random.choice(REGIONS), instance_type=instance_type, ) data.rds_gens.append(rds_gen) LOG.info(f"Building {max_route53_gens} Route 53 generators ...") for _ in range(max_route53_gens): route53_gen = initialize_dicta("R53", config) route53_gen.update(product_family=random.choices( ("DNS Zone", "DNS Query"), weights=[1, 10])[0]) data.route53_gens.append(route53_gen) LOG.info(f"Building {max_s3_gens} S3 generators ...") for _ in range(max_s3_gens): _rate, _amount = RATE_AMT.get("S3") s3_gen = initialize_dicta("S3", config) s3_gen.update(amount=round(next(_amount), 5), rate=round(next(_rate), 5)) data.s3_gens.append(s3_gen) LOG.info(f"Building {max_vpc_gens} VPC generators ...") for _ in range(max_vpc_gens): vpc_gen = initialize_dicta("VPC", config) data.vpc_gens.append(vpc_gen) LOG.info(f"Adding {max_users} users.") for _ in range(max_users): data.users.append(generate_account_id(config)) return data
def aws_create_report(options): # noqa: C901 """Create a cost usage report file.""" data = [] if not (options.get("start_date") and options.get("end_date")): options = load_static_report_data(options) start_date = options.get("start_date") end_date = options.get("end_date") aws_finalize_report = options.get("aws_finalize_report") months = _create_month_list(start_date, end_date, options.get("days_per_month")) aws_bucket_name = options.get("aws_bucket_name") aws_report_name = options.get("aws_report_name") write_monthly = options.get("write_monthly", False) payer_account = None for month in months: data = [] file_number = 0 monthly_files = [] num_gens = len(AWS_GENERATORS) ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5 LOG.info( f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}." ) for count, generator in enumerate(AWS_GENERATORS): gen_start_date = options.get("gen_starts", {}).get(generator.__name__, month.get("start")) gen_end_date = options.get("gen_ends", {}).get(generator.__name__, month.get("end")) # Skip if generator usage is outside of current month if gen_end_date < month.get("start"): continue if gen_start_date > month.get("end"): continue gen_start_date, gen_end_date = _create_generator_dates_from_yaml( options, month) gen = generator(gen_start_date, gen_end_date, user_config=options.get("static_report_file")) payer_account = gen.config[0].get("accounts", {}).get("payer") for _ in range(gen.num_instances): for hour in gen.generate_data(): data += [hour] if len(data) == options.get("row_limit"): file_number += 1 month_output_file = write_aws_file( file_number, aws_report_name, month.get("name"), gen_start_date.year, data, aws_finalize_report, gen.AWS_COLUMNS, ) monthly_files.append(month_output_file) data.clear() if count % ten_percent == 0: LOG.info(f"Done with {count} of {num_gens} generators.") if file_number != 0: file_number += 1 month_output_file = write_aws_file( file_number, aws_report_name, month.get("name"), gen_start_date.year, data, aws_finalize_report, gen.AWS_COLUMNS, ) monthly_files.append(month_output_file) if aws_bucket_name: manifest_values = {"account": payer_account} manifest_values.update(options) manifest_values["start_date"] = gen_start_date manifest_values["end_date"] = gen_end_date manifest_values["file_names"] = monthly_files s3_cur_path, manifest_data = aws_generate_manifest(manifest_values) s3_month_path = os.path.dirname(s3_cur_path) s3_month_manifest_path = s3_month_path + "/" + aws_report_name + "-Manifest.json" s3_assembly_manifest_path = s3_cur_path + "/" + aws_report_name + "-Manifest.json" temp_manifest = _write_manifest(manifest_data) aws_route_file(aws_bucket_name, s3_month_manifest_path, temp_manifest) aws_route_file(aws_bucket_name, s3_assembly_manifest_path, temp_manifest) for monthly_file in monthly_files: temp_cur_zip = _gzip_report(monthly_file) destination_file = "{}/{}.gz".format( s3_cur_path, os.path.basename(monthly_file)) aws_route_file(aws_bucket_name, destination_file, temp_cur_zip) os.remove(temp_cur_zip) os.remove(temp_manifest) if not write_monthly: _remove_files(monthly_files)
def ocp_create_report(options): # noqa: C901 """Create a usage report file.""" start_date = options.get("start_date") end_date = options.get("end_date") cluster_id = options.get("ocp_cluster_id") static_report_data = options.get("static_report_data") if static_report_data: generators = _get_generators(static_report_data.get("generators")) else: generators = [{"generator": OCPGenerator, "attributes": None}] months = _create_month_list(start_date, end_date) insights_upload = options.get("insights_upload") write_monthly = options.get("write_monthly", False) for month in months: data = {OCP_POD_USAGE: [], OCP_STORAGE_USAGE: [], OCP_NODE_LABEL: []} file_numbers = { OCP_POD_USAGE: 0, OCP_STORAGE_USAGE: 0, OCP_NODE_LABEL: 0 } monthly_files = [] for generator in generators: generator_cls = generator.get("generator") attributes = generator.get("attributes") gen_start_date = month.get("start") gen_end_date = month.get("end") if attributes: # Skip if generator usage is outside of current month if attributes.get("end_date") < month.get("start"): continue if attributes.get("start_date") > month.get("end"): continue gen_start_date, gen_end_date = _create_generator_dates_from_yaml( attributes, month) gen = generator_cls(gen_start_date, gen_end_date, attributes) for report_type in gen.ocp_report_generation.keys(): LOG.info( f"Generating data for {report_type} for {month.get('name')}" ) for hour in gen.generate_data(report_type): data[report_type] += [hour] if len(data[report_type]) == options.get("row_limit"): file_numbers[report_type] += 1 month_output_file = write_ocp_file( file_numbers[report_type], cluster_id, month.get("name"), gen_start_date.year, report_type, data[report_type], ) monthly_files.append(month_output_file) data[report_type].clear() for report_type in gen.ocp_report_generation.keys(): if file_numbers[report_type] != 0: file_numbers[report_type] += 1 month_output_file = write_ocp_file( file_numbers[report_type], cluster_id, month.get("name"), gen_start_date.year, report_type, data[report_type], ) monthly_files.append(month_output_file) if insights_upload: # Generate manifest for all files ocp_assembly_id = uuid4() report_datetime = gen_start_date temp_files = {} for num_file in range(len(monthly_files)): temp_filename = f"{ocp_assembly_id}_openshift_report.{num_file}.csv" temp_usage_file = create_temporary_copy( monthly_files[num_file], temp_filename, "payload") temp_files[temp_filename] = temp_usage_file manifest_file_names = ", ".join(f'"{w}"' for w in temp_files) manifest_values = { "ocp_cluster_id": cluster_id, "ocp_assembly_id": ocp_assembly_id, "report_datetime": report_datetime, "files": manifest_file_names[1:-1], } manifest_data = ocp_generate_manifest(manifest_values) temp_manifest = _write_manifest(manifest_data) temp_manifest_name = create_temporary_copy(temp_manifest, "manifest.json", "payload") # Tarball and upload files individually for temp_usage_file in temp_files.values(): report_files = [temp_usage_file, temp_manifest_name] temp_usage_zip = _tar_gzip_report_files(report_files) ocp_route_file(insights_upload, temp_usage_zip) os.remove(temp_usage_file) os.remove(temp_usage_zip) os.remove(temp_manifest) os.remove(temp_manifest_name) if not write_monthly: LOG.info("Cleaning up local directory") _remove_files(monthly_files)
def gcp_create_report(options): # noqa: C901 """Create a GCP cost usage report file.""" fake = Faker() report_prefix = options.get("gcp_report_prefix") or fake.word() gcp_bucket_name = options.get("gcp_bucket_name") start_date = options.get("start_date") end_date = options.get("end_date") projects = [] if options.get("static_report_file"): config = load_yaml(options.get("static_report_file")) project_gens = list( filter(lambda x: "ProjectGenerator" in x, config.get("generators"))) projects = [] for gen in project_gens: project_generator = ProjectGenerator( gen.get("ProjectGenerator", {}).get("Account ID")) projects = projects + [ prj for prj in project_generator.generate_projects() ] else: account = "{}-{}".format(fake.word(), fake.word()) project_generator = ProjectGenerator(account) projects = projects + [ prj for prj in project_generator.generate_projects() ] data = {} for project in projects: num_gens = len(GCP_GENERATORS) ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5 LOG.info( f"Producing data for {num_gens} generators for GCP Project '{project}'." ) for count, generator in enumerate(GCP_GENERATORS): gen = generator(start_date, end_date, project, user_config=options.get("static_report_file")) generated_data = gen.generate_data() for key, item in generated_data.items(): if key in data: data[key] += item else: data[key] = item count += 1 if count % ten_percent == 0: LOG.info(f"Done with {count} of {num_gens} generators.") monthly_files = [] for day, daily_data in data.items(): output_file_name = "{}-{}.csv".format(report_prefix, day.strftime("%Y-%m-%d")) output_file_path = os.path.join(os.getcwd(), output_file_name) monthly_files.append(output_file_path) _write_csv(output_file_path, daily_data, GCP_REPORT_COLUMNS) if gcp_bucket_name: gcp_route_file(gcp_bucket_name, output_file_path, output_file_name) write_monthly = options.get("write_monthly", False) if not write_monthly: _remove_files(monthly_files)
def gcp_create_report(options): # noqa: C901 """Create a GCP cost usage report file.""" fake = Faker() report_prefix = options.get("gcp_report_prefix") or fake.word() gcp_bucket_name = options.get("gcp_bucket_name") start_date = options.get("start_date") end_date = options.get("end_date") static_report_data = options.get("static_report_data") if static_report_data: generators = _get_generators(static_report_data.get("generators")) projects = static_report_data.get("projects") else: generators = [ { "generator": CloudStorageGenerator, "attributes": None }, { "generator": ComputeEngineGenerator, "attributes": None }, ] account = "{}-{}".format(fake.word(), fake.word()) project_generator = ProjectGenerator(account) projects = project_generator.generate_projects() data = {} for project in projects: num_gens = len(generators) ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5 LOG.info( f"Producing data for {num_gens} generators for {'INSERT SOMETHING FOR GCP'}." ) for count, generator in enumerate(generators): attributes = generator.get("attributes", {}) if attributes: start_date = attributes.get("start_date") end_date = attributes.get("end_date") generator_cls = generator.get("generator") gen = generator_cls(start_date, end_date, project, attributes=attributes) generated_data = gen.generate_data() for key, item in generated_data.items(): if key in data: data[key] += item else: data[key] = item count += 1 if count % ten_percent == 0: LOG.info(f"Done with {count} of {num_gens} generators.") monthly_files = [] for day, daily_data in data.items(): output_file_name = "{}-{}.csv".format(report_prefix, day.strftime("%Y-%m-%d")) output_file_path = os.path.join(os.getcwd(), output_file_name) monthly_files.append(output_file_path) _write_csv(output_file_path, daily_data, GCP_REPORT_COLUMNS) if gcp_bucket_name: gcp_route_file(gcp_bucket_name, output_file_path, output_file_name) write_monthly = options.get("write_monthly", False) if not write_monthly: _remove_files(monthly_files)
def gcp_bucket_to_dataset(gcp_bucket_name, file_name, dataset_name, table_name): """ Create a gcp dataset from a file stored in a bucket. Args: gcp_bucket_name (String): The container to upload file to file_name (String): The name of the file stored in GCP dataset_name (String): name for the created dataset in GCP table_name (String): name for the created dataset in GCP Returns: (Boolean): True if the dataset was created """ uploaded = True if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: LOG.warning( "Please set your GOOGLE_APPLICATION_CREDENTIALS " "environment variable before attempting to create a dataset.") return False try: bigquery_client = bigquery.Client() project_name = bigquery_client.project dataset_id = f"{project_name}.{dataset_name}" dataset = bigquery.Dataset(dataset_id) # delete dataset (does not error if it doesn't exist) and create fresh one bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) dataset = bigquery_client.create_dataset(dataset) table_id = f"{project_name}.{dataset_name}.{table_name}" # creates the job config with specifics job_config = bigquery.LoadJobConfig( write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, time_partitioning=bigquery.TimePartitioning(), schema=[ { "name": "billing_account_id", "type": "STRING", "mode": "NULLABLE" }, { "name": "service", "type": "RECORD", "fields": [ { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "description", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "sku", "type": "RECORD", "fields": [ { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "description", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "usage_start_time", "type": "TIMESTAMP", "mode": "NULLABLE" }, { "name": "usage_end_time", "type": "TIMESTAMP", "mode": "NULLABLE" }, { "name": "project", "type": "RECORD", "fields": [ { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "number", "type": "STRING", "mode": "NULLABLE" }, { "name": "name", "type": "STRING", "mode": "NULLABLE" }, { "name": "labels", "type": "RECORD", "fields": [ { "name": "key", "type": "STRING", "mode": "NULLABLE" }, { "name": "value", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "REPEATED", }, { "name": "ancestry_numbers", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "labels", "type": "RECORD", "fields": [ { "name": "key", "type": "STRING", "mode": "NULLABLE" }, { "name": "value", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "REPEATED", }, { "name": "system_labels", "type": "RECORD", "fields": [ { "name": "key", "type": "STRING", "mode": "NULLABLE" }, { "name": "value", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "REPEATED", }, { "name": "location", "type": "RECORD", "fields": [ { "name": "location", "type": "STRING", "mode": "NULLABLE" }, { "name": "country", "type": "STRING", "mode": "NULLABLE" }, { "name": "region", "type": "STRING", "mode": "NULLABLE" }, { "name": "zone", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "export_time", "type": "TIMESTAMP", "mode": "NULLABLE" }, { "name": "cost", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "currency", "type": "STRING", "mode": "NULLABLE" }, { "name": "currency_conversion_rate", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "usage", "type": "RECORD", "fields": [ { "name": "amount", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "unit", "type": "STRING", "mode": "NULLABLE" }, { "name": "amount_in_pricing_units", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "pricing_unit", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, { "name": "credits", "type": "RECORD", "fields": [ { "name": "name", "type": "STRING", "mode": "NULLABLE" }, { "name": "amount", "type": "FLOAT", "mode": "NULLABLE" }, { "name": "full_name", "type": "STRING", "mode": "NULLABLE" }, { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "type", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "REPEATED", }, { "name": "invoice", "type": "RECORD", "fields": [{ "name": "month", "type": "STRING", "mode": "NULLABLE" }], "mode": "NULLABLE", }, { "name": "cost_type", "type": "STRING", "mode": "NULLABLE" }, { "name": "adjustment_info", "type": "RECORD", "fields": [ { "name": "id", "type": "STRING", "mode": "NULLABLE" }, { "name": "description", "type": "STRING", "mode": "NULLABLE" }, { "name": "mode", "type": "STRING", "mode": "NULLABLE" }, { "name": "type", "type": "STRING", "mode": "NULLABLE" }, ], "mode": "NULLABLE", }, ], ) uri = f"gs://{gcp_bucket_name}/{file_name}" load_job = bigquery_client.load_table_from_uri(uri, table_id, job_config=job_config) # waits for the job to finish, will raise an exception if it doesnt work load_job.result() # after the table is created, delete the file from the storage bucket storage_client = storage.Client() bucket = storage_client.bucket(gcp_bucket_name) blob = bucket.blob(file_name) blob.delete() LOG.info( f"Dataset {dataset_name} created in GCP bigquery under the table name {table_name}." ) except GoogleCloudError as upload_err: LOG.error(upload_err) uploaded = False return uploaded
def extract_payload(base_path, payload_file): """ Extract OCP usage report payload into local directory structure. Payload is expected to be a .tar.gz file that contains: 1. manifest.json - dictionary containing usage report details needed for report processing. Dictionary Contains: file - .csv usage report file name date - DateTime that the payload was created uuid - uuid for payload cluster_id - OCP cluster ID. 2. *.csv - Actual usage report for the cluster. Format is: Format is: <uuid>_report_name.csv On successful completion the report and manifest will be in a directory structure that the OCPReportDownloader is expecting. Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101 Args: basepath (String): base local directory path. payload_file (String): path to payload.tar.gz file containing report and manifest. Returns: None """ # Create temporary directory for initial file staging and verification temp_dir = tempfile.mkdtemp() # Extract tarball into temp directory try: mytar = TarFile.open(payload_file) mytar.extractall(path=temp_dir) files = mytar.getnames() manifest_path = [manifest for manifest in files if "manifest.json" in manifest] except ReadError as error: LOG.error("Unable to untar file. Reason: {}".format(str(error))) shutil.rmtree(temp_dir) return # Open manifest.json file and build the payload dictionary. full_manifest_path = "{}/{}".format(temp_dir, manifest_path[0]) report_meta = get_report_details(os.path.dirname(full_manifest_path)) # Create directory tree for report. usage_month = month_date_range(report_meta.get("date")) destination_dir = "{}/{}/{}".format(base_path, report_meta.get("cluster_id"), usage_month) os.makedirs(destination_dir, exist_ok=True) # Copy manifest manifest_destination_path = "{}/{}".format(destination_dir, os.path.basename(report_meta.get("manifest_path"))) shutil.copy(report_meta.get("manifest_path"), manifest_destination_path) # Copy report payload for report_file in report_meta.get("files"): subdirectory = os.path.dirname(full_manifest_path) payload_source_path = f"{subdirectory}/{report_file}" payload_destination_path = f"{destination_dir}/{report_file}" try: shutil.copy(payload_source_path, payload_destination_path) except FileNotFoundError: pass LOG.info("Successfully extracted OCP for {}/{}".format(report_meta.get("cluster_id"), usage_month)) # Remove temporary directory and files shutil.rmtree(temp_dir)