Exemplo n.º 1
0
def upload_to_azure_container(storage_file_name, local_path,
                              storage_file_path):
    """Upload data to a storage account.

    Args:
        storage_file_name (String): The container to upload file to
        local_path  (String): The full local file system path of the file
        storage_file_path (String): The file path to upload to within container

    Returns:
        (Boolean): True if file was uploaded

    """
    try:
        # Retrieve the connection string for use with the application.
        connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
        blob_service_client = BlobServiceClient.from_connection_string(
            connect_str)
        blob_client = blob_service_client.get_blob_client(
            container=storage_file_name, blob=storage_file_path)
        with open(local_path, "rb") as data:
            blob_client.upload_blob(data=data)
        LOG.info(f"uploaded {storage_file_name} to {storage_file_path}")
    except (CloudError, ClientException, IOError) as error:
        LOG.error(error)
        traceback.print_exc(file=sys.stderr)
        return False
    return True
Exemplo n.º 2
0
def copy_to_local_dir(local_dir_home, local_path, local_file_path=None):
    """Upload data to an local directory.

    Args:
        local_dir_home (String): Local file path representing the bucket
        local_path  (String): The local file system path of the file
        local_file_path (String): The path to store the file to
    Returns:
        (Boolean): True if file was uploaded

    """
    if not os.path.isdir(local_dir_home):
        LOG.info(
            f"Path does not exist for the local directory: {local_dir_home}")
        return False
    full_bucket_path = local_dir_home
    outpath = local_path
    if local_file_path:
        full_bucket_path = f"{local_dir_home}/{local_file_path}"
        outpath = local_file_path
    os.makedirs(os.path.dirname(full_bucket_path), exist_ok=True)
    shutil.copyfile(local_path, full_bucket_path)
    msg = f"Copied {outpath} to local directory {local_dir_home}."
    LOG.info(msg)
    return True
Exemplo n.º 3
0
def _write_csv(output_file, data, header):
    """Output csv file data."""
    LOG.info(f"Writing to {output_file.split('/')[-1]}")
    with open(output_file, "w") as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writeheader()
        for row in data:
            writer.writerow(row)
Exemplo n.º 4
0
def _write_jsonl(output_file, data):
    """Output JSON Lines file data for bigquery."""
    LOG.info(f"Writing to {output_file.split('/')[-1]}")
    with open(output_file, "w") as file:
        for row in data:
            json.dump(row, file)
            # each dictionary "row" is its own line in a JSONL file
            file.write("\n")
Exemplo n.º 5
0
def load_static_report_data(options):
    """Load and set start and end dates if static file is provided."""
    if not options.get("static_report_file"):
        options["start_date"] = datetime.now().replace(day=1,
                                                       hour=0,
                                                       minute=0,
                                                       second=0,
                                                       microsecond=0)
        options["end_date"] = datetime.now().replace(hour=0,
                                                     minute=0,
                                                     second=0,
                                                     microsecond=0)
        return options

    LOG.info("Loading static data...")
    start_dates = {}
    end_dates = {}
    static_report_data = load_yaml(options.get("static_report_file"))
    for generator_dict in static_report_data.get("generators"):
        for genname, attributes in generator_dict.items():

            generated_start_date = calculate_start_date(
                attributes.get("start_date"))
            start_dates[genname] = generated_start_date

            if attributes.get("end_date"):
                generated_end_date = calculate_end_date(
                    generated_start_date, attributes.get("end_date"))
            else:
                generated_end_date = today()

            if options.get("provider") == "azure":
                generated_end_date += timedelta(hours=24)
            else:
                generated_end_date = generated_end_date.replace(hour=23,
                                                                minute=59)
            end_dates[genname] = generated_end_date

    options["gen_starts"] = start_dates
    options["gen_ends"] = end_dates

    options["start_date"] = min(start_dates.values())
    latest_date = max(end_dates.values())
    last_day_of_month = calendar.monthrange(year=latest_date.year,
                                            month=latest_date.month)[1]
    if latest_date.month == datetime.now(
    ).month and latest_date.year == datetime.now().year:
        last_day_of_month = datetime.now(
        ).day  # don't generate date into the future.
    options["end_date"] = latest_date.replace(day=last_day_of_month,
                                              hour=0,
                                              minute=0)

    return options
Exemplo n.º 6
0
def run(provider_type, options):
    """Run nise."""
    LOG.info("Creating reports...")
    if provider_type == "aws":
        aws_create_report(options)
    elif provider_type == "azure":
        azure_create_report(options)
    elif provider_type == "ocp":
        ocp_create_report(options)
    elif provider_type == "gcp":
        gcp_create_report(options)
Exemplo n.º 7
0
def _load_static_report_data(options):
    """Validate/load and set start_date if static file is provided."""
    if not options.get("static_report_file"):
        return

    static_file = options.get("static_report_file")
    if not os.path.exists(static_file):
        LOG.error(f"file does not exist: '{static_file}'")
        sys.exit()

    LOG.info("Loading static data...")
    aws_tags = set()
    start_dates = []
    end_dates = []
    static_report_data = load_yaml(static_file)
    for generator_dict in static_report_data.get("generators"):
        for _, attributes in generator_dict.items():
            start_date = get_start_date(attributes, options)
            generated_start_date = calculate_start_date(start_date)
            start_dates.append(generated_start_date)

            if attributes.get("end_date"):
                generated_end_date = calculate_end_date(
                    generated_start_date, attributes.get("end_date"))
            elif options.get("end_date") and options.get(
                    "end_date").date() != today().date():
                generated_end_date = calculate_end_date(
                    generated_start_date, options.get("end_date"))
            else:
                generated_end_date = today()
            if options.get("provider") == "azure":
                generated_end_date += datetime.timedelta(hours=24)
            end_dates.append(generated_end_date)

            attributes["start_date"] = str(generated_start_date)
            attributes["end_date"] = str(generated_end_date)

            if options.get("provider") == "aws":
                aws_tags.update(attributes.get("tags", {}).keys())

    options["start_date"] = min(start_dates)
    latest_date = max(end_dates)
    last_day_of_month = calendar.monthrange(year=latest_date.year,
                                            month=latest_date.month)[1]
    options["end_date"] = latest_date.replace(day=last_day_of_month,
                                              hour=0,
                                              minute=0)
    options["static_report_data"] = static_report_data

    if options.get("provider") == "aws" and aws_tags:
        options["aws_tags"] = aws_tags

    return True
Exemplo n.º 8
0
def ocp_route_file(insights_upload, local_path):
    """Route file to either Upload Service or local filesystem."""
    if os.path.isdir(insights_upload):
        extract_payload(insights_upload, local_path)
    else:
        response = post_payload_to_ingest_service(insights_upload, local_path)
        if response.status_code == 202:
            LOG.info("File uploaded successfully.")
        else:
            LOG.error(f"{response.status_code} File upload failed.")

        LOG.info(response.text)
Exemplo n.º 9
0
def run(provider_type, options):
    """Run nise."""
    static_data_bool = _load_static_report_data(options)
    if not options.get("start_date"):
        raise NiseError("'start_date' is required in static files.")
    if not static_data_bool:
        fix_dates(options, provider_type)

    LOG.info("Creating reports...")
    if provider_type == "aws":
        aws_create_report(options)
    elif provider_type == "azure":
        azure_create_report(options)
    elif provider_type == "ocp":
        ocp_create_report(options)
    elif provider_type == "gcp":
        gcp_create_report(options)
Exemplo n.º 10
0
def post_payload_to_ingest_service(insights_upload, local_path):
    """POST the payload to Insights via header or basic auth."""
    insights_account_id = os.environ.get("INSIGHTS_ACCOUNT_ID")
    insights_org_id = os.environ.get("INSIGHTS_ORG_ID")
    insights_user = os.environ.get("INSIGHTS_USER")
    insights_password = os.environ.get("INSIGHTS_PASSWORD")
    if os.path.isfile(local_path):
        file_info = os.stat(local_path)
        filesize = _convert_bytes(file_info.st_size)
    LOG.info(f"Upload File: ({local_path}) filesize is {filesize}.")
    with open(local_path, "rb") as upload_file:
        if insights_account_id and insights_org_id:
            header = {
                "identity": {
                    "account_number": insights_account_id,
                    "internal": {
                        "org_id": insights_org_id
                    }
                }
            }
            headers = {
                "x-rh-identity":
                base64.b64encode(json.dumps(header).encode("UTF-8"))
            }
            return requests.post(
                insights_upload,
                data={},
                files={
                    "file": ("payload.tar.gz", upload_file,
                             "application/vnd.redhat.hccm.tar+tgz")
                },
                headers=headers,
            )

        return requests.post(
            insights_upload,
            data={},
            files={
                "file": ("payload.tar.gz", upload_file,
                         "application/vnd.redhat.hccm.tar+tgz")
            },
            auth=(insights_user, insights_password),
            verify=False,
        )
Exemplo n.º 11
0
def upload_to_s3(bucket_name, bucket_file_path, local_path):
    """Upload data to an S3 bucket.

    Args:
        bucket_name (String): The name of the S3 bucket
        bucket_file_path (String): The path to store the file to
        local_path  (String): The local file system path of the file
    Returns:
        (Boolean): True if file was uploaded

    """
    uploaded = True
    try:
        s3_client = boto3.resource("s3")
        s3_client.Bucket(bucket_name).upload_file(local_path, bucket_file_path)
        msg = f"Uploaded {bucket_file_path} to s3 bucket {bucket_name}."
        LOG.info(msg)
    except (ClientError, BotoConnectionError,
            boto3.exceptions.S3UploadFailedError) as upload_err:
        LOG.error(upload_err)
        uploaded = False
    return uploaded
Exemplo n.º 12
0
def oci_create_report(options):
    """Create cost and usage report files."""
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    fake = Faker()
    attributes = {}
    attributes["tenant_id"] = f"ocid1.tenancy.oc1..{fake.pystr(min_chars=20, max_chars=50)}"
    generators = [
        {"generator": OCIComputeGenerator, "attributes": attributes},
        {"generator": OCINetworkGenerator, "attributes": attributes},
        {"generator": OCIBlockStorageGenerator, "attributes": attributes},
        {"generator": OCIDatabaseGenerator, "attributes": attributes},
    ]
    months = _create_month_list(start_date, end_date)
    currency = default_currency(options.get("currency"), static_currency=None)
    # write_monthly = options.get("write_monthly", False)
    file_number = 0

    for month in months:
        data = {OCI_COST_REPORT: [], OCI_USAGE_REPORT: []}
        monthly_files = []

        for report_type in OCI_REPORT_TYPE_TO_COLS:
            LOG.info(f"Generating data for OCI for {month.get('name')}")

            for generator in generators:
                generator_cls = generator.get("generator")
                attributes = generator.get("attributes")
                gen_start_date = month.get("start")
                gen_end_date = month.get("end")
                gen = generator_cls(gen_start_date, gen_end_date, currency, report_type, attributes)

                for hour in gen.generate_data(report_type=report_type):
                    data[report_type] += [hour]

            month_output_file = write_oci_file(report_type, file_number, data[report_type], options)
            monthly_files.append(month_output_file)
        file_number += 1
Exemplo n.º 13
0
def upload_to_gcp_storage(bucket_name, source_file_name,
                          destination_blob_name):
    """
    Upload data to a GCP Storage Bucket.

    Args:
        bucket_name (String): The container to upload file to
        source_file_name  (String): The full local file system path of the file
        destination_blob_name (String): Destination blob name to store in GCP.

    Returns:
        (Boolean): True if file was uploaded

    """
    uploaded = True

    if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
        LOG.warning("Please set your GOOGLE_APPLICATION_CREDENTIALS "
                    "environment variable before attempting to load file into"
                    "GCP Storage.")
        return False
    try:
        storage_client = storage.Client()

        bucket = storage_client.get_bucket(bucket_name)
        blob = bucket.blob(destination_blob_name)

        blob.upload_from_filename(source_file_name)

        LOG.info(
            f"File {source_file_name} uploaded to GCP Storage {destination_blob_name}."
        )
    except GoogleCloudError as upload_err:
        LOG.error(upload_err)
        uploaded = False
    return uploaded
Exemplo n.º 14
0
def _gcp_bigquery_process(
    start_date, end_date, currency, projects, generators, options, gcp_bucket_name, gcp_dataset_name, gcp_table_name
):

    data = []
    for project in projects:
        num_gens = len(generators)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(f"Producing data for {num_gens} generators for start: {start_date} and end: {end_date}.")
        for count, generator in enumerate(generators):
            attributes = generator.get("attributes", {})
            if attributes:
                start_date = attributes.get("start_date")
                end_date = attributes.get("end_date")

            generator_cls = generator.get("generator")
            gen = generator_cls(start_date, end_date, currency, project, attributes=attributes)
            for hour in gen.generate_data():
                data += [hour]
            count += 1
            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

    monthly_files = []
    local_file_path, output_file_name = write_gcp_file_jsonl(start_date, end_date, data, options)
    monthly_files.append(local_file_path)

    if gcp_bucket_name:
        gcp_route_file(gcp_bucket_name, local_file_path, output_file_name)

    if not gcp_table_name:
        etag = options.get("gcp_etag") if options.get("gcp_etag") else str(uuid4())
        gcp_table_name = f"gcp_billing_export_{etag}"
    gcp_bucket_to_dataset(gcp_bucket_name, output_file_name, gcp_dataset_name, gcp_table_name)

    return monthly_files
Exemplo n.º 15
0
def replace_args(args, yaml, provider, ocp_on_cloud):
    """Replace appropriate file paths in args."""
    if not yaml:
        raise KeyError(
            f"Options YAML error: {provider} is not defined under {ocp_on_cloud}"
        )
    from nise.yaml_gen import STATIC_DIR

    args.provider = provider

    if yaml.get(f"{provider}-output-filename"):
        args.output_file_name = yaml.get(f"{provider}-output-filename")
    else:
        LOG.info(
            f"Output file not defined for {provider} under {ocp_on_cloud}. Writing to '{ocp_on_cloud}_{provider}.yml'."
        )
        args.output_file_name = f"{ocp_on_cloud}_{provider}.yml"

    if args.default:
        template_file_name = os.path.join(STATIC_DIR,
                                          yaml.get(f"{provider}-template"))
        config_file_name = os.path.join(STATIC_DIR,
                                        yaml.get(f"{provider}-gen-config"))
    else:
        template_file_name = yaml.get(f"{provider}-template")
        config_file_name = yaml.get(f"{provider}-gen-config")

    if template_file_name:
        args.template_file_name = template_file_name
    else:
        LOG.info(
            f"Template not defined for {provider} under {ocp_on_cloud}. Using default template."
        )
        args.template_file_name = os.path.join(
            STATIC_DIR, f"{provider}_static_data.yml.j2")
    if config_file_name:
        args.config_file_name = config_file_name
    else:
        LOG.info(
            f"Configuration not defined for {provider} under {ocp_on_cloud}. Using default configuration."
        )
        args.config_file_name = None
Exemplo n.º 16
0
def azure_create_report(options):  # noqa: C901
    """Create a cost usage report file."""
    data = []
    if not (options.get("start_date") and options.get("end_date")):
        options = load_static_report_data(options)
    start_date = options.get("start_date")
    end_date = options.get("end_date")

    months = _create_month_list(start_date, end_date,
                                options.get("days_per_month"))

    meter_cache = {}
    # The options params are not going to change so we don't
    # have to keep resetting the var inside of the for loop
    azure_container_name = options.get("azure_container_name")
    storage_account_name = options.get("azure_account_name")
    azure_prefix_name = options.get("azure_prefix_name")
    azure_report_name = options.get("azure_report_name")
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = []
        monthly_files = []
        num_gens = len(AZURE_GENERATORS)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(
            f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}."
        )
        for count, generator in enumerate(AZURE_GENERATORS):
            gen_start_date = options.get("gen_starts",
                                         {}).get(generator.__name__,
                                                 month.get("start"))
            gen_end_date = options.get("gen_ends",
                                       {}).get(generator.__name__,
                                               month.get("end"))
            # Skip if generator usage is outside of current month
            if gen_end_date < month.get("start"):
                continue
            if gen_start_date > month.get("end"):
                continue

            gen_start_date, gen_end_date = _create_generator_dates_from_yaml(
                options, month)

            gen = generator(gen_start_date,
                            gen_end_date,
                            meter_cache,
                            user_config=options.get("static_report_file"))
            data += gen.generate_data()
            meter_cache = gen.get_meter_cache()

            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

        local_path, output_file_name = _generate_azure_filename()
        date_range = _generate_azure_date_range(month)

        _write_csv(local_path, data, AZURE_COLUMNS)
        monthly_files.append(local_path)

        if azure_container_name:
            file_path = ""
            if azure_prefix_name:
                file_path += azure_prefix_name + "/"
            file_path += azure_report_name + "/"
            file_path += date_range + "/"
            file_path += output_file_name

            # azure blob upload
            storage_account_name = options.get("azure_account_name", None)
            if storage_account_name:
                azure_route_file(storage_account_name, azure_container_name,
                                 local_path, file_path)
            # local dir upload
            else:
                azure_route_file(azure_container_name, file_path, local_path)
        if not write_monthly:
            _remove_files(monthly_files)
Exemplo n.º 17
0
    def build_data(self, config, _random=False):  # noqa: C901
        """
        Build a structure to fill out a nise yaml template.

        Struture has the form of:
            {start_date: date,    (config.start_date)
            ens_date: date,      (config.end_date)
            nodes: [             (number of nodes controlled by config.max_nodes)
                {node_name: str,     (dynamic)
                cpu_cores: int,     (config.max_node_cpu_cores)
                memory_gig: int,    (config.max_node_memory_gig)
                resource_id: str,   (dynamic)
                namespaces: [     (number of namespaces controlled by config.max_node_namespaces)
                    {namespace: str,   (dynamic)
                    pods: [           (number of pods controlled by config.max_node_namespace_pods)
                        pod_name: str,        (dynamic)
                        cpu_request: int,     (config.max_node_namespace_pod_cpu_request)
                        mem_request_gig: int, (config.max_node_namespace_pod_mem_request_gig)
                        cpu_limit: int,       (config.max_node_namespace_pod_cpu_limit)
                        mem_limit_gig: int,   (config.max_node_namespace_pod_mem_limit_gig)
                        pod_seconds: int,     (config.max_node_namespace_pod_seconds)
                        labels: str           (dynamic)
                    ],
                    volumes: [
                        volume_name: str,
                        storage_class: str,
                        volume_request_gig: int,
                        labels: str,
                        volume_claims: [
                            volume_claim_name: str,
                            pod_name: str,
                            labels: str,
                            capacity_gig: int
                        ]
                    ]}
                ]}
            ]}

        Parameters:
            config : dicta

        Returns:
            dicta
        """
        LOG.info("Data build starting")

        data = dicta(start_date=str(config.start_date),
                     end_date=str(config.end_date),
                     nodes=[],
                     resourceid_labels=None)
        resourceid_labels = {}

        if _random:
            max_nodes = FAKER.random_int(1, config.max_nodes)
        else:
            max_nodes = config.max_nodes

        for node_ix in range(max_nodes):
            LOG.info(f"Building node {node_ix + 1}/{max_nodes}...")
            if _random:
                cores = FAKER.random_int(1, config.max_node_cpu_cores)
                memory = FAKER.random_int(1, config.max_node_memory_gig)
            else:
                cores = config.max_node_cpu_cores
                memory = config.max_node_memory_gig

            resource_id = generate_resource_id(config)
            node_name = generate_name(config)
            id_label_key = (resource_id, node_name)
            resourceid_labels[id_label_key] = []
            node = dicta(name=node_name,
                         cpu_cores=cores,
                         memory_gig=memory,
                         resource_id=resource_id,
                         namespaces=[])
            data.nodes.append(node)

            if _random:
                max_namespaces = FAKER.random_int(1,
                                                  config.max_node_namespaces)
            else:
                max_namespaces = config.max_node_namespaces

            for namespace_ix in range(max_namespaces):
                LOG.info(
                    f"Building node {node_ix + 1}/{max_nodes}; namespace {namespace_ix + 1}/{max_namespaces}..."
                )

                namespace = dicta(name=generate_name(config, prefix=node.name),
                                  pods=[],
                                  volumes=[])
                node.namespaces.append(namespace)

                if _random:
                    max_pods = FAKER.random_int(1,
                                                config.max_node_namespace_pods)
                else:
                    max_pods = config.max_node_namespace_pods

                LOG.info(f"Building {max_pods} pods...")
                for pod_ix in range(max_pods):
                    if _random:
                        cpu_req = FAKER.random_int(1, node.cpu_cores)
                        mem_req = FAKER.random_int(1, node.memory_gig)
                        cpu_lim = FAKER.random_int(1, node.cpu_cores)
                        mem_lim = FAKER.random_int(1, node.memory_gig)
                        pod_sec = FAKER.random_int(
                            config.min_node_namespace_pod_seconds,
                            config.max_node_namespace_pod_seconds,
                            step=(config.max_node_namespace_pod_seconds // 10)
                            or 1800,
                        )
                    else:
                        cpu_lim = cpu_req = node.cpu_cores
                        mem_lim = mem_req = node.memory_gig
                        pod_sec = config.max_node_namespace_pod_seconds

                    pod_labels = generate_labels(
                        config.max_node_namespace_pod_labels)
                    resourceid_labels[id_label_key].append(pod_labels)
                    pod = dicta(
                        name=generate_name(config,
                                           prefix=namespace.name + "-pod",
                                           suffix=str(pod_ix),
                                           dynamic=False),
                        cpu_request=cpu_req,
                        mem_request_gig=mem_req,
                        cpu_limit=cpu_lim,
                        mem_limit_gig=mem_lim,
                        pod_seconds=pod_sec,
                        labels=pod_labels,
                    )
                    namespace.pods.append(pod)

                if _random:
                    max_volumes = FAKER.random_int(
                        1, config.max_node_namespace_volumes)
                else:
                    max_volumes = config.max_node_namespace_volumes

                LOG.info(f"Building {max_volumes} volumes...")
                for volume_ix in range(max_volumes):
                    if _random:
                        storage_cls = config.storage_classes[FAKER.random_int(
                            0,
                            len(config.storage_classes) - 1)]
                        vol_req = FAKER.random_int(
                            1, config.max_node_namespace_volume_request_gig)
                    else:
                        storage_cls = config.storage_classes[0]
                        vol_req = config.max_node_namespace_volume_request_gig

                    volume_labels = generate_labels(
                        config.max_node_namespace_volume_labels)
                    resourceid_labels[id_label_key].append(volume_labels)
                    volume = dicta(
                        name=generate_name(config,
                                           prefix=namespace.name + "-vol",
                                           suffix=str(volume_ix),
                                           dynamic=False),
                        storage_class=storage_cls,
                        volume_request_gig=vol_req,
                        labels=volume_labels,
                        volume_claims=[],
                    )
                    namespace.volumes.append(volume)

                    if _random:
                        max_volume_claims = FAKER.random_int(
                            1, config.max_node_namespace_volume_volume_claims)
                    else:
                        max_volume_claims = config.max_node_namespace_volume_volume_claims

                    for volume_claim_ix in range(max_volume_claims):
                        if _random:
                            cap = FAKER.random_int(
                                1, config.
                                max_node_namespace_volume_volume_claim_capacity_gig
                            )
                        else:
                            cap = config.max_node_namespace_volume_volume_claim_capacity_gig

                        pod_name = namespace.pods[-1 if volume_claim_ix >= len(
                            namespace.pods) else volume_claim_ix].name
                        volume_claim_labels = generate_labels(
                            config.
                            max_node_namespace_volume_volume_claim_labels)
                        resourceid_labels[id_label_key].append(
                            volume_claim_labels)
                        volume_claim = dicta(
                            name=generate_name(
                                config,
                                prefix=namespace.name + "-vol-claim",
                                suffix=str(volume_claim_ix),
                                dynamic=False,
                            ),
                            pod_name=pod_name,
                            labels=volume_claim_labels,
                            capacity_gig=cap,
                        )
                        volume.volume_claims.append(volume_claim)
        data.resourceid_labels = resourceid_labels
        return data
Exemplo n.º 18
0
    def build_data(self, config, _random=False):  # noqa: C901
        """

        """
        LOG.info("Data build starting")

        data = dicta(
            payer=config.payer_account,
            bandwidth_gens=[],
            sql_gens=[],
            storage_gens=[],
            vmachine_gens=[],
            vnetwork_gens=[],
        )

        max_bandwidth_gens = FAKER.random_int(
            0, config.max_bandwidth_gens
        ) if _random else config.max_bandwidth_gens
        max_sql_gens = FAKER.random_int(
            0, config.max_sql_gens) if _random else config.max_sql_gens
        max_storage_gens = FAKER.random_int(
            0, config.max_storage_gens) if _random else config.max_storage_gens
        max_vmachine_gens = FAKER.random_int(
            0,
            config.max_vmachine_gens) if _random else config.max_vmachine_gens
        max_vnetwork_gens = FAKER.random_int(
            0,
            config.max_vnetwork_gens) if _random else config.max_vnetwork_gens

        LOG.info(f"Building {max_bandwidth_gens} Bandwidth generators ...")
        for _ in range(max_bandwidth_gens):
            data.bandwidth_gens.append(
                generate_azure_dicta(config, "bandwidth"))

        LOG.info(f"Building {max_sql_gens} SQL generators ...")
        for _ in range(max_sql_gens):
            data.sql_gens.append(generate_azure_dicta(config, "sql"))

        LOG.info(f"Building {max_storage_gens} Storage generators ...")
        for _ in range(max_storage_gens):
            data.storage_gens.append(generate_azure_dicta(config, "storage"))

        LOG.info(
            f"Building {max_vmachine_gens} Virtual Machine generators ...")
        for _ in range(max_vmachine_gens):
            data.vmachine_gens.append(generate_azure_dicta(config, "vmachine"))

        LOG.info(
            f"Building {max_vnetwork_gens} Virtual Network generators ...")
        for _ in range(max_vnetwork_gens):
            data.vnetwork_gens.append(generate_azure_dicta(config, "vnetwork"))

        return data
Exemplo n.º 19
0
def ocp_create_report(options):  # noqa: C901
    """Create a usage report file."""
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    cluster_id = options.get("ocp_cluster_id")
    static_report_data = options.get("static_report_data")
    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
    else:
        generators = [{"generator": OCPGenerator, "attributes": {}}]

    months = _create_month_list(start_date, end_date)
    insights_upload = options.get("insights_upload")
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = {OCP_POD_USAGE: [], OCP_STORAGE_USAGE: [], OCP_NODE_LABEL: [], OCP_NAMESPACE_LABEL: []}
        file_numbers = {OCP_POD_USAGE: 0, OCP_STORAGE_USAGE: 0, OCP_NODE_LABEL: 0, OCP_NAMESPACE_LABEL: 0}
        monthly_files = []
        for generator in generators:
            generator_cls = generator.get("generator")
            attributes = generator.get("attributes")
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            if attributes:
                # Skip if generator usage is outside of current month
                if attributes.get("end_date") < month.get("start"):
                    continue
                if attributes.get("start_date") > month.get("end"):
                    continue

                gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month)

            gen = generator_cls(gen_start_date, gen_end_date, attributes)
            for report_type in gen.ocp_report_generation.keys():
                LOG.info(f"Generating data for {report_type} for {month.get('name')}")
                for hour in gen.generate_data(report_type):
                    data[report_type] += [hour]
                    if len(data[report_type]) == options.get("row_limit"):
                        file_numbers[report_type] += 1
                        month_output_file = write_ocp_file(
                            file_numbers[report_type],
                            cluster_id,
                            month.get("name"),
                            gen_start_date.year,
                            report_type,
                            data[report_type],
                        )
                        monthly_files.append(month_output_file)
                        data[report_type].clear()

        for report_type in gen.ocp_report_generation.keys():
            if file_numbers[report_type] != 0:
                file_numbers[report_type] += 1

            month_output_file = write_ocp_file(
                file_numbers[report_type],
                cluster_id,
                month.get("name"),
                gen_start_date.year,
                report_type,
                data[report_type],
            )
            monthly_files.append(month_output_file)

        if insights_upload:
            # Generate manifest for all files
            ocp_assembly_id = uuid4()
            report_datetime = gen_start_date
            temp_files = {}
            for num_file in range(len(monthly_files)):
                temp_filename = f"{ocp_assembly_id}_openshift_report.{num_file}.csv"
                temp_usage_file = create_temporary_copy(monthly_files[num_file], temp_filename, "payload")
                temp_files[temp_filename] = temp_usage_file

            manifest_file_names = ", ".join(f'"{w}"' for w in temp_files)
            cr_status = {
                "clusterID": "4e009161-4f40-42c8-877c-3e59f6baea3d",
                "clusterVersion": "stable-4.6",
                "api_url": "https://console.redhat.com",
                "authentication": {"type": "token"},
                "packaging": {"max_reports_to_store": 30, "max_size_MB": 100},
                "upload": {
                    "ingress_path": "/api/ingress/v1/upload",
                    "upload": "True",
                    "upload_wait": 27,
                    "upload_cycle": 360,
                },
                "operator_commit": __version__,
                "prometheus": {
                    "prometheus_configured": "True",
                    "prometheus_connected": "True",
                    "last_query_start_time": "2021-07-28T12:22:37Z",
                    "last_query_success_time": "2021-07-28T12:22:37Z",
                    "service_address": "https://thanos-querier.openshift-monitoring.svc:9091",
                },
                "reports": {
                    "report_month": "07",
                    "last_hour_queried": "2021-07-28 11:00:00 - 2021-07-28 11:59:59",
                    "data_collected": "True",
                },
                "source": {
                    "sources_path": "/api/sources/v1.0/",
                    "name": "INSERT-SOURCE-NAME",
                    "create_source": "False",
                    "check_cycle": 1440,
                },
            }
            cr_status = json.dumps(cr_status)
            manifest_values = {
                "ocp_cluster_id": cluster_id,
                "ocp_assembly_id": ocp_assembly_id,
                "report_datetime": report_datetime,
                "files": manifest_file_names[1:-1],
                "start": gen_start_date,
                "end": gen_end_date,
                "version": __version__,
                "certified": False,
                "cr_status": cr_status,
            }
            manifest_data = ocp_generate_manifest(manifest_values)
            temp_manifest = _write_manifest(manifest_data)
            temp_manifest_name = create_temporary_copy(temp_manifest, "manifest.json", "payload")

            # Tarball and upload files individually
            for temp_usage_file in temp_files.values():
                report_files = [temp_usage_file, temp_manifest_name]
                temp_usage_zip = _tar_gzip_report_files(report_files)
                ocp_route_file(insights_upload, temp_usage_zip)
                os.remove(temp_usage_file)
                os.remove(temp_usage_zip)

            os.remove(temp_manifest)
            os.remove(temp_manifest_name)
        if not write_monthly:
            LOG.info("Cleaning up local directory")
            _remove_files(monthly_files)
Exemplo n.º 20
0
def gcp_create_report(options):  # noqa: C901
    """Create a GCP cost usage report file."""
    fake = Faker()
    gcp_bucket_name = options.get("gcp_bucket_name")
    gcp_dataset_name = options.get("gcp_dataset_name")
    gcp_table_name = options.get("gcp_table_name")

    start_date = options.get("start_date")
    end_date = options.get("end_date")

    static_report_data = options.get("static_report_data")

    if gcp_dataset_name:
        # if the file is supposed to be uploaded to a bigquery table, it needs the JSONL version of everything
        if static_report_data:
            generators = _get_jsonl_generators(static_report_data.get("generators"))
            static_projects = static_report_data.get("projects", {})
            projects = []
            for static_dict in static_projects:
                # this lets the format of the YAML remain the same whether using the upload or local
                project = {}
                project["name"] = static_dict.get("project.name", "")
                project["id"] = static_dict.get("project.id", "")
                # the k:v pairs are split by ; and the keys and values split by :
                static_labels = static_dict.get("project.labels", [])
                labels = []
                if static_labels:
                    for pair in static_labels.split(";"):
                        key = pair.split(":")[0]
                        value = pair.split(":")[1]
                        labels.append({"key": key, "value": value})

                project["labels"] = labels
                location = {}
                location["location"] = static_dict.get("location.location", "")
                location["country"] = static_dict.get("location.country", "")
                location["region"] = static_dict.get("location.region", "")
                location["zone"] = static_dict.get("location.zone", "")
                row = {
                    "billing_account_id": static_dict.get("billing_account_id", ""),
                    "project": project,
                    "location": location,
                }
                projects.append(row)
                currency = default_currency(options.get("currency"), get_gcp_static_currency(generators))
        else:
            generators = [
                {"generator": JSONLCloudStorageGenerator, "attributes": {}},
                {"generator": JSONLComputeEngineGenerator, "attributes": {}},
                {"generator": JSONLGCPNetworkGenerator, "attributes": {}},
                {"generator": JSONLGCPDatabaseGenerator, "attributes": {}},
            ]
            account = fake.word()
            project_generator = JSONLProjectGenerator(account)
            projects = project_generator.generate_projects()
            currency = default_currency(options.get("currency"), None)

    elif static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
        projects = static_report_data.get("projects")
        processed_projects = copy.deepcopy(projects)
        for i, project in enumerate(projects):
            labels = []
            static_labels = project.get("project.labels", [])
            if static_labels:
                for pair in static_labels.split(";"):
                    key = pair.split(":")[0]
                    value = pair.split(":")[1]
                    labels.append({"key": key, "value": value})
                processed_projects[i]["project.labels"] = json.dumps(labels)
        projects = processed_projects

    else:
        generators = [
            {"generator": CloudStorageGenerator, "attributes": {}},
            {"generator": ComputeEngineGenerator, "attributes": {}},
            {"generator": GCPNetworkGenerator, "attributes": {}},
            {"generator": GCPDatabaseGenerator, "attributes": {}},
        ]
        account = fake.word()

        project_generator = ProjectGenerator(account)
        projects = project_generator.generate_projects()

    if gcp_dataset_name:
        monthly_files = _gcp_bigquery_process(
            start_date,
            end_date,
            currency,
            projects,
            generators,
            options,
            gcp_bucket_name,
            gcp_dataset_name,
            gcp_table_name,
        )
    else:
        months = _create_month_list(start_date, end_date)
        monthly_files = []
        output_files = []
        for month in months:
            data = []
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            for project in projects:
                num_gens = len(generators)
                ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
                LOG.info(
                    f"Producing data for {num_gens} generators for start: {gen_start_date} and end: {gen_end_date}."
                )
                for count, generator in enumerate(generators):
                    attributes = generator.get("attributes", {})
                    if attributes:
                        start_date = attributes.get("start_date")
                        end_date = attributes.get("end_date")
                        currency = default_currency(options.get("currency"), attributes.get("currency"))
                    else:
                        currency = default_currency(options.get("currency"), None)
                    if gen_end_date > end_date:
                        gen_end_date = end_date

                    generator_cls = generator.get("generator")
                    gen = generator_cls(gen_start_date, gen_end_date, currency, project, attributes=attributes)
                    for hour in gen.generate_data():
                        data += [hour]
                    count += 1
                    if count % ten_percent == 0:
                        LOG.info(f"Done with {count} of {num_gens} generators.")

            local_file_path, output_file_name = write_gcp_file(gen_start_date, gen_end_date, data, options)
            output_files.append(output_file_name)
            monthly_files.append(local_file_path)

        for index, month_file in enumerate(monthly_files):
            if gcp_bucket_name:
                gcp_route_file(gcp_bucket_name, month_file, output_files[index])

    write_monthly = options.get("write_monthly", False)
    if not write_monthly:
        _remove_files(monthly_files)
Exemplo n.º 21
0
def aws_create_report(options):  # noqa: C901
    """Create a cost usage report file."""
    data = []
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    aws_finalize_report = options.get("aws_finalize_report")
    static_report_data = options.get("static_report_data")
    manifest_gen = True if options.get("manifest_generation") is None else options.get("manifest_generation")

    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
        accounts_list = static_report_data.get("accounts")
    else:
        generators = [
            {"generator": DataTransferGenerator, "attributes": {}},
            {"generator": EBSGenerator, "attributes": {}},
            {"generator": EC2Generator, "attributes": {}},
            {"generator": S3Generator, "attributes": {}},
            {"generator": RDSGenerator, "attributes": {}},
            {"generator": Route53Generator, "attributes": {}},
            {"generator": VPCGenerator, "attributes": {}},
            {"generator": MarketplaceGenerator, "attributes": {}},
        ]
        accounts_list = None

    months = _create_month_list(start_date, end_date)

    payer_account, usage_accounts, currency_code = _generate_accounts(accounts_list)
    currency_code = default_currency(options.get("currency"), currency_code)

    aws_bucket_name = options.get("aws_bucket_name")
    aws_report_name = options.get("aws_report_name")
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = []
        file_number = 0
        monthly_files = []
        fake = Faker()
        num_gens = len(generators)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}.")
        for count, generator in enumerate(generators):
            generator_cls = generator.get("generator")
            attributes = generator.get("attributes")
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            if attributes:
                # Skip if generator usage is outside of current month
                if attributes.get("end_date") < month.get("start"):
                    continue
                if attributes.get("start_date") > month.get("end"):
                    continue

                gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month)

            gen = generator_cls(
                gen_start_date,
                gen_end_date,
                currency_code,
                payer_account,
                usage_accounts,
                attributes,
                options.get("aws_tags"),
            )
            num_instances = 1 if attributes else randint(2, 60)
            for _ in range(num_instances):
                for hour in gen.generate_data():
                    data += [hour]
                    if len(data) == options.get("row_limit"):
                        file_number += 1
                        month_output_file = write_aws_file(
                            file_number,
                            aws_report_name,
                            month.get("name"),
                            gen_start_date.year,
                            data,
                            aws_finalize_report,
                            static_report_data,
                            gen.AWS_COLUMNS,
                        )
                        monthly_files.append(month_output_file)
                        data.clear()

            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

        if file_number != 0:
            file_number += 1
        month_output_file = write_aws_file(
            file_number,
            aws_report_name,
            month.get("name"),
            gen_start_date.year,
            data,
            aws_finalize_report,
            static_report_data,
            gen.AWS_COLUMNS,
        )
        monthly_files.append(month_output_file)

        if aws_bucket_name:
            manifest_values = {"account": payer_account}
            manifest_values.update(options)
            manifest_values["start_date"] = gen_start_date
            manifest_values["end_date"] = gen_end_date
            manifest_values["file_names"] = monthly_files

            if not manifest_gen:
                s3_cur_path, _ = aws_generate_manifest(fake, manifest_values)
                for monthly_file in monthly_files:
                    temp_cur_zip = _gzip_report(monthly_file)
                    destination_file = "{}/{}.gz".format(s3_cur_path, os.path.basename(monthly_file))
                    aws_route_file(aws_bucket_name, destination_file, temp_cur_zip)
                    os.remove(temp_cur_zip)
            else:
                s3_cur_path, manifest_data = aws_generate_manifest(fake, manifest_values)
                s3_month_path = os.path.dirname(s3_cur_path)
                s3_month_manifest_path = s3_month_path + "/" + aws_report_name + "-Manifest.json"
                s3_assembly_manifest_path = s3_cur_path + "/" + aws_report_name + "-Manifest.json"

                temp_manifest = _write_manifest(manifest_data)
                aws_route_file(aws_bucket_name, s3_month_manifest_path, temp_manifest)
                aws_route_file(aws_bucket_name, s3_assembly_manifest_path, temp_manifest)

                for monthly_file in monthly_files:
                    temp_cur_zip = _gzip_report(monthly_file)
                    destination_file = "{}/{}.gz".format(s3_cur_path, os.path.basename(monthly_file))
                    aws_route_file(aws_bucket_name, destination_file, temp_cur_zip)
                    os.remove(temp_cur_zip)

                os.remove(temp_manifest)

        if not write_monthly:
            _remove_files(monthly_files)
Exemplo n.º 22
0
def azure_create_report(options):  # noqa: C901
    """Create a cost usage report file."""
    data = []
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    static_report_data = options.get("static_report_data")
    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
        accounts_list = static_report_data.get("accounts")
    else:
        generators = [
            {"generator": BandwidthGenerator, "attributes": {}},
            {"generator": SQLGenerator, "attributes": {}},
            {"generator": StorageGenerator, "attributes": {}},
            {"generator": VMGenerator, "attributes": {}},
            {"generator": VNGenerator, "attributes": {}},
        ]
        accounts_list = None

    months = _create_month_list(start_date, end_date)

    account_info = _generate_azure_account_info(accounts_list)
    currency = default_currency(options.get("currency"), account_info["currency_code"])

    meter_cache = {}
    # The options params are not going to change so we don't
    # have to keep resetting the var inside of the for loop
    azure_container_name = options.get("azure_container_name")
    storage_account_name = options.get("azure_account_name")
    azure_prefix_name = options.get("azure_prefix_name")
    azure_report_name = options.get("azure_report_name")
    version_two = options.get("version_two", False)
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = []
        monthly_files = []
        num_gens = len(generators)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}.")
        for count, generator in enumerate(generators):
            generator_cls = generator.get("generator")
            attributes = generator.get("attributes", {})
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            if attributes:
                # Skip if generator usage is outside of current month
                if attributes.get("end_date") < month.get("start"):
                    continue
                if attributes.get("start_date") > month.get("end"):
                    continue
            else:
                attributes = {"end_date": end_date, "start_date": start_date}

            gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month)

            if attributes.get("meter_cache"):
                meter_cache.update(attributes.get("meter_cache"))  # needed so that meter_cache can be defined in yaml
            attributes["meter_cache"] = meter_cache
            attributes["version_two"] = version_two
            gen = generator_cls(gen_start_date, gen_end_date, currency, account_info, attributes)
            azure_columns = gen.azure_columns
            data += gen.generate_data()
            meter_cache = gen.get_meter_cache()

            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

        local_path, output_file_name = _generate_azure_filename()
        date_range = _generate_azure_date_range(month)

        _write_csv(local_path, data, azure_columns)
        monthly_files.append(local_path)

        if azure_container_name:
            file_path = ""
            if azure_prefix_name:
                file_path += azure_prefix_name + "/"
            file_path += azure_report_name + "/"
            file_path += date_range + "/"
            file_path += output_file_name

            # azure blob upload
            storage_account_name = options.get("azure_account_name", None)
            if storage_account_name:
                azure_route_file(storage_account_name, azure_container_name, local_path, file_path)
            # local dir upload
            else:
                azure_route_file(azure_container_name, file_path, local_path)
        if not write_monthly:
            _remove_files(monthly_files)
Exemplo n.º 23
0
    def build_data(self, config, _random=False):  # noqa: C901
        """Build the data."""
        LOG.info("Data build starting")

        data = dicta(
            payer=config.payer_account,
            data_transfer_gens=[],
            ebs_gens=[],
            ec2_gens=[],
            rds_gens=[],
            route53_gens=[],
            s3_gens=[],
            vpc_gens=[],
            users=[],
        )

        max_data_transfer_gens = (FAKER.random_int(
            0, config.max_data_transfer_gens) if _random else
                                  config.max_data_transfer_gens)
        max_ebs_gens = FAKER.random_int(
            0, config.max_ebs_gens) if _random else config.max_ebs_gens
        max_ec2_gens = FAKER.random_int(
            0, config.max_ec2_gens) if _random else config.max_ec2_gens
        max_rds_gens = FAKER.random_int(
            0, config.max_rds_gens) if _random else config.max_rds_gens
        max_route53_gens = FAKER.random_int(
            0, config.max_route53_gens) if _random else config.max_route53_gens
        max_s3_gens = FAKER.random_int(
            0, config.max_s3_gens) if _random else config.max_s3_gens
        max_vpc_gens = FAKER.random_int(
            0, config.max_vpc_gens) if _random else config.max_vpc_gens
        max_users = FAKER.random_int(
            0, config.max_users) if _random else config.max_users

        LOG.info(
            f"Building {max_data_transfer_gens} data transfer generators ...")
        for _ in range(max_data_transfer_gens):
            _rate, _amount = RATE_AMT.get("DTG")
            data_transfer_gen = initialize_dicta("DTG", config)
            data_transfer_gen.update(amount=round(next(_amount), 5),
                                     rate=round(next(_rate), 5))
            data.data_transfer_gens.append(data_transfer_gen)

        LOG.info(f"Building {max_ebs_gens} EBS generators ...")
        for _ in range(max_ebs_gens):
            _rate, _amount = RATE_AMT.get("EBS")
            ebs_gen = initialize_dicta("EBS", config)
            ebs_gen.update(amount=round(next(_amount), 5),
                           rate=round(next(_rate), 5))
            data.ebs_gens.append(ebs_gen)

        LOG.info(f"Building {max_ec2_gens} EC2 generators ...")
        for _ in range(max_ec2_gens):
            instance_type = random.choice(EC2_INSTANCES)
            ec2_gen = initialize_dicta("EC2", config)
            ec2_gen.update(
                processor_arch=instance_type.get("processor_arch"),
                region=random.choice(REGIONS),
                instance_type=instance_type,
            )
            data.ec2_gens.append(ec2_gen)

        LOG.info(f"Building {max_rds_gens} RDS generators ...")
        for _ in range(max_rds_gens):
            instance_type = random.choice(RDS_INSTANCES)
            rds_gen = initialize_dicta("RDS", config)
            rds_gen.update(
                processor_arch=instance_type.get("processor_arch"),
                region=random.choice(REGIONS),
                instance_type=instance_type,
            )
            data.rds_gens.append(rds_gen)

        LOG.info(f"Building {max_route53_gens} Route 53 generators ...")
        for _ in range(max_route53_gens):
            route53_gen = initialize_dicta("R53", config)
            route53_gen.update(product_family=random.choices(
                ("DNS Zone", "DNS Query"), weights=[1, 10])[0])
            data.route53_gens.append(route53_gen)

        LOG.info(f"Building {max_s3_gens} S3 generators ...")
        for _ in range(max_s3_gens):
            _rate, _amount = RATE_AMT.get("S3")
            s3_gen = initialize_dicta("S3", config)
            s3_gen.update(amount=round(next(_amount), 5),
                          rate=round(next(_rate), 5))
            data.s3_gens.append(s3_gen)

        LOG.info(f"Building {max_vpc_gens} VPC generators ...")
        for _ in range(max_vpc_gens):
            vpc_gen = initialize_dicta("VPC", config)
            data.vpc_gens.append(vpc_gen)

        LOG.info(f"Adding {max_users} users.")
        for _ in range(max_users):
            data.users.append(generate_account_id(config))

        return data
Exemplo n.º 24
0
def aws_create_report(options):  # noqa: C901
    """Create a cost usage report file."""
    data = []
    if not (options.get("start_date") and options.get("end_date")):
        options = load_static_report_data(options)
    start_date = options.get("start_date")
    end_date = options.get("end_date")

    aws_finalize_report = options.get("aws_finalize_report")

    months = _create_month_list(start_date, end_date,
                                options.get("days_per_month"))

    aws_bucket_name = options.get("aws_bucket_name")
    aws_report_name = options.get("aws_report_name")
    write_monthly = options.get("write_monthly", False)
    payer_account = None
    for month in months:
        data = []
        file_number = 0
        monthly_files = []
        num_gens = len(AWS_GENERATORS)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(
            f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}."
        )
        for count, generator in enumerate(AWS_GENERATORS):
            gen_start_date = options.get("gen_starts",
                                         {}).get(generator.__name__,
                                                 month.get("start"))
            gen_end_date = options.get("gen_ends",
                                       {}).get(generator.__name__,
                                               month.get("end"))
            # Skip if generator usage is outside of current month
            if gen_end_date < month.get("start"):
                continue
            if gen_start_date > month.get("end"):
                continue

            gen_start_date, gen_end_date = _create_generator_dates_from_yaml(
                options, month)

            gen = generator(gen_start_date,
                            gen_end_date,
                            user_config=options.get("static_report_file"))
            payer_account = gen.config[0].get("accounts", {}).get("payer")
            for _ in range(gen.num_instances):
                for hour in gen.generate_data():
                    data += [hour]
                    if len(data) == options.get("row_limit"):
                        file_number += 1
                        month_output_file = write_aws_file(
                            file_number,
                            aws_report_name,
                            month.get("name"),
                            gen_start_date.year,
                            data,
                            aws_finalize_report,
                            gen.AWS_COLUMNS,
                        )
                        monthly_files.append(month_output_file)
                        data.clear()

            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

        if file_number != 0:
            file_number += 1

        month_output_file = write_aws_file(
            file_number,
            aws_report_name,
            month.get("name"),
            gen_start_date.year,
            data,
            aws_finalize_report,
            gen.AWS_COLUMNS,
        )
        monthly_files.append(month_output_file)

        if aws_bucket_name:
            manifest_values = {"account": payer_account}
            manifest_values.update(options)
            manifest_values["start_date"] = gen_start_date
            manifest_values["end_date"] = gen_end_date
            manifest_values["file_names"] = monthly_files
            s3_cur_path, manifest_data = aws_generate_manifest(manifest_values)
            s3_month_path = os.path.dirname(s3_cur_path)
            s3_month_manifest_path = s3_month_path + "/" + aws_report_name + "-Manifest.json"
            s3_assembly_manifest_path = s3_cur_path + "/" + aws_report_name + "-Manifest.json"

            temp_manifest = _write_manifest(manifest_data)
            aws_route_file(aws_bucket_name, s3_month_manifest_path,
                           temp_manifest)
            aws_route_file(aws_bucket_name, s3_assembly_manifest_path,
                           temp_manifest)

            for monthly_file in monthly_files:
                temp_cur_zip = _gzip_report(monthly_file)
                destination_file = "{}/{}.gz".format(
                    s3_cur_path, os.path.basename(monthly_file))
                aws_route_file(aws_bucket_name, destination_file, temp_cur_zip)
                os.remove(temp_cur_zip)
            os.remove(temp_manifest)
        if not write_monthly:
            _remove_files(monthly_files)
Exemplo n.º 25
0
def ocp_create_report(options):  # noqa: C901
    """Create a usage report file."""
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    cluster_id = options.get("ocp_cluster_id")
    static_report_data = options.get("static_report_data")
    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
    else:
        generators = [{"generator": OCPGenerator, "attributes": None}]

    months = _create_month_list(start_date, end_date)
    insights_upload = options.get("insights_upload")
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = {OCP_POD_USAGE: [], OCP_STORAGE_USAGE: [], OCP_NODE_LABEL: []}
        file_numbers = {
            OCP_POD_USAGE: 0,
            OCP_STORAGE_USAGE: 0,
            OCP_NODE_LABEL: 0
        }
        monthly_files = []
        for generator in generators:
            generator_cls = generator.get("generator")
            attributes = generator.get("attributes")
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            if attributes:
                # Skip if generator usage is outside of current month
                if attributes.get("end_date") < month.get("start"):
                    continue
                if attributes.get("start_date") > month.get("end"):
                    continue

                gen_start_date, gen_end_date = _create_generator_dates_from_yaml(
                    attributes, month)

            gen = generator_cls(gen_start_date, gen_end_date, attributes)
            for report_type in gen.ocp_report_generation.keys():
                LOG.info(
                    f"Generating data for {report_type} for {month.get('name')}"
                )
                for hour in gen.generate_data(report_type):
                    data[report_type] += [hour]
                    if len(data[report_type]) == options.get("row_limit"):
                        file_numbers[report_type] += 1
                        month_output_file = write_ocp_file(
                            file_numbers[report_type],
                            cluster_id,
                            month.get("name"),
                            gen_start_date.year,
                            report_type,
                            data[report_type],
                        )
                        monthly_files.append(month_output_file)
                        data[report_type].clear()

        for report_type in gen.ocp_report_generation.keys():
            if file_numbers[report_type] != 0:
                file_numbers[report_type] += 1

            month_output_file = write_ocp_file(
                file_numbers[report_type],
                cluster_id,
                month.get("name"),
                gen_start_date.year,
                report_type,
                data[report_type],
            )
            monthly_files.append(month_output_file)

        if insights_upload:
            # Generate manifest for all files
            ocp_assembly_id = uuid4()
            report_datetime = gen_start_date
            temp_files = {}
            for num_file in range(len(monthly_files)):
                temp_filename = f"{ocp_assembly_id}_openshift_report.{num_file}.csv"
                temp_usage_file = create_temporary_copy(
                    monthly_files[num_file], temp_filename, "payload")
                temp_files[temp_filename] = temp_usage_file

            manifest_file_names = ", ".join(f'"{w}"' for w in temp_files)
            manifest_values = {
                "ocp_cluster_id": cluster_id,
                "ocp_assembly_id": ocp_assembly_id,
                "report_datetime": report_datetime,
                "files": manifest_file_names[1:-1],
            }
            manifest_data = ocp_generate_manifest(manifest_values)
            temp_manifest = _write_manifest(manifest_data)
            temp_manifest_name = create_temporary_copy(temp_manifest,
                                                       "manifest.json",
                                                       "payload")

            # Tarball and upload files individually
            for temp_usage_file in temp_files.values():
                report_files = [temp_usage_file, temp_manifest_name]
                temp_usage_zip = _tar_gzip_report_files(report_files)
                ocp_route_file(insights_upload, temp_usage_zip)
                os.remove(temp_usage_file)
                os.remove(temp_usage_zip)

            os.remove(temp_manifest)
            os.remove(temp_manifest_name)
        if not write_monthly:
            LOG.info("Cleaning up local directory")
            _remove_files(monthly_files)
Exemplo n.º 26
0
def gcp_create_report(options):  # noqa: C901
    """Create a GCP cost usage report file."""
    fake = Faker()
    report_prefix = options.get("gcp_report_prefix") or fake.word()
    gcp_bucket_name = options.get("gcp_bucket_name")

    start_date = options.get("start_date")
    end_date = options.get("end_date")

    projects = []
    if options.get("static_report_file"):
        config = load_yaml(options.get("static_report_file"))
        project_gens = list(
            filter(lambda x: "ProjectGenerator" in x,
                   config.get("generators")))
        projects = []
        for gen in project_gens:
            project_generator = ProjectGenerator(
                gen.get("ProjectGenerator", {}).get("Account ID"))
            projects = projects + [
                prj for prj in project_generator.generate_projects()
            ]
    else:
        account = "{}-{}".format(fake.word(), fake.word())
        project_generator = ProjectGenerator(account)
        projects = projects + [
            prj for prj in project_generator.generate_projects()
        ]

    data = {}
    for project in projects:
        num_gens = len(GCP_GENERATORS)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(
            f"Producing data for {num_gens} generators for GCP Project '{project}'."
        )
        for count, generator in enumerate(GCP_GENERATORS):
            gen = generator(start_date,
                            end_date,
                            project,
                            user_config=options.get("static_report_file"))
            generated_data = gen.generate_data()
            for key, item in generated_data.items():
                if key in data:
                    data[key] += item
                else:
                    data[key] = item

            count += 1
            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

    monthly_files = []
    for day, daily_data in data.items():
        output_file_name = "{}-{}.csv".format(report_prefix,
                                              day.strftime("%Y-%m-%d"))

        output_file_path = os.path.join(os.getcwd(), output_file_name)
        monthly_files.append(output_file_path)
        _write_csv(output_file_path, daily_data, GCP_REPORT_COLUMNS)

    if gcp_bucket_name:
        gcp_route_file(gcp_bucket_name, output_file_path, output_file_name)

    write_monthly = options.get("write_monthly", False)
    if not write_monthly:
        _remove_files(monthly_files)
Exemplo n.º 27
0
def gcp_create_report(options):  # noqa: C901
    """Create a GCP cost usage report file."""
    fake = Faker()

    report_prefix = options.get("gcp_report_prefix") or fake.word()
    gcp_bucket_name = options.get("gcp_bucket_name")

    start_date = options.get("start_date")
    end_date = options.get("end_date")

    static_report_data = options.get("static_report_data")
    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
        projects = static_report_data.get("projects")

    else:
        generators = [
            {
                "generator": CloudStorageGenerator,
                "attributes": None
            },
            {
                "generator": ComputeEngineGenerator,
                "attributes": None
            },
        ]
        account = "{}-{}".format(fake.word(), fake.word())

        project_generator = ProjectGenerator(account)
        projects = project_generator.generate_projects()

    data = {}
    for project in projects:
        num_gens = len(generators)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(
            f"Producing data for {num_gens} generators for {'INSERT SOMETHING FOR GCP'}."
        )
        for count, generator in enumerate(generators):
            attributes = generator.get("attributes", {})
            if attributes:
                start_date = attributes.get("start_date")
                end_date = attributes.get("end_date")

            generator_cls = generator.get("generator")
            gen = generator_cls(start_date,
                                end_date,
                                project,
                                attributes=attributes)
            generated_data = gen.generate_data()
            for key, item in generated_data.items():
                if key in data:
                    data[key] += item
                else:
                    data[key] = item

            count += 1
            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

    monthly_files = []
    for day, daily_data in data.items():
        output_file_name = "{}-{}.csv".format(report_prefix,
                                              day.strftime("%Y-%m-%d"))

        output_file_path = os.path.join(os.getcwd(), output_file_name)
        monthly_files.append(output_file_path)
        _write_csv(output_file_path, daily_data, GCP_REPORT_COLUMNS)

    if gcp_bucket_name:
        gcp_route_file(gcp_bucket_name, output_file_path, output_file_name)

    write_monthly = options.get("write_monthly", False)
    if not write_monthly:
        _remove_files(monthly_files)
Exemplo n.º 28
0
def gcp_bucket_to_dataset(gcp_bucket_name, file_name, dataset_name,
                          table_name):
    """
    Create a gcp dataset from a file stored in a bucket.

    Args:
        gcp_bucket_name  (String): The container to upload file to
        file_name  (String): The name of the file stored in GCP
        dataset_name (String): name for the created dataset in GCP
        table_name (String): name for the created dataset in GCP

    Returns:
        (Boolean): True if the dataset was created

    """
    uploaded = True

    if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
        LOG.warning(
            "Please set your GOOGLE_APPLICATION_CREDENTIALS "
            "environment variable before attempting to create a dataset.")
        return False
    try:
        bigquery_client = bigquery.Client()

        project_name = bigquery_client.project
        dataset_id = f"{project_name}.{dataset_name}"
        dataset = bigquery.Dataset(dataset_id)

        # delete dataset (does not error if it doesn't exist) and create fresh one
        bigquery_client.delete_dataset(dataset_id,
                                       delete_contents=True,
                                       not_found_ok=True)
        dataset = bigquery_client.create_dataset(dataset)

        table_id = f"{project_name}.{dataset_name}.{table_name}"

        # creates the job config with specifics
        job_config = bigquery.LoadJobConfig(
            write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
            source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
            time_partitioning=bigquery.TimePartitioning(),
            schema=[
                {
                    "name": "billing_account_id",
                    "type": "STRING",
                    "mode": "NULLABLE"
                },
                {
                    "name":
                    "service",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "description",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name":
                    "sku",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "description",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name": "usage_start_time",
                    "type": "TIMESTAMP",
                    "mode": "NULLABLE"
                },
                {
                    "name": "usage_end_time",
                    "type": "TIMESTAMP",
                    "mode": "NULLABLE"
                },
                {
                    "name":
                    "project",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "number",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "name",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name":
                            "labels",
                            "type":
                            "RECORD",
                            "fields": [
                                {
                                    "name": "key",
                                    "type": "STRING",
                                    "mode": "NULLABLE"
                                },
                                {
                                    "name": "value",
                                    "type": "STRING",
                                    "mode": "NULLABLE"
                                },
                            ],
                            "mode":
                            "REPEATED",
                        },
                        {
                            "name": "ancestry_numbers",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name":
                    "labels",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "key",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "value",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "REPEATED",
                },
                {
                    "name":
                    "system_labels",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "key",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "value",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "REPEATED",
                },
                {
                    "name":
                    "location",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "location",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "country",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "region",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "zone",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name": "export_time",
                    "type": "TIMESTAMP",
                    "mode": "NULLABLE"
                },
                {
                    "name": "cost",
                    "type": "FLOAT",
                    "mode": "NULLABLE"
                },
                {
                    "name": "currency",
                    "type": "STRING",
                    "mode": "NULLABLE"
                },
                {
                    "name": "currency_conversion_rate",
                    "type": "FLOAT",
                    "mode": "NULLABLE"
                },
                {
                    "name":
                    "usage",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "amount",
                            "type": "FLOAT",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "unit",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "amount_in_pricing_units",
                            "type": "FLOAT",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "pricing_unit",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name":
                    "credits",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "name",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "amount",
                            "type": "FLOAT",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "full_name",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "type",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "REPEATED",
                },
                {
                    "name":
                    "invoice",
                    "type":
                    "RECORD",
                    "fields": [{
                        "name": "month",
                        "type": "STRING",
                        "mode": "NULLABLE"
                    }],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name": "cost_type",
                    "type": "STRING",
                    "mode": "NULLABLE"
                },
                {
                    "name":
                    "adjustment_info",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "description",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "mode",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "type",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
            ],
        )

        uri = f"gs://{gcp_bucket_name}/{file_name}"

        load_job = bigquery_client.load_table_from_uri(uri,
                                                       table_id,
                                                       job_config=job_config)

        # waits for the job to finish, will raise an exception if it doesnt work
        load_job.result()

        # after the table is created, delete the file from the storage bucket
        storage_client = storage.Client()
        bucket = storage_client.bucket(gcp_bucket_name)
        blob = bucket.blob(file_name)
        blob.delete()

        LOG.info(
            f"Dataset {dataset_name} created in GCP bigquery under the table name {table_name}."
        )
    except GoogleCloudError as upload_err:
        LOG.error(upload_err)
        uploaded = False
    return uploaded
Exemplo n.º 29
0
def extract_payload(base_path, payload_file):
    """
    Extract OCP usage report payload into local directory structure.

    Payload is expected to be a .tar.gz file that contains:
    1. manifest.json - dictionary containing usage report details needed
        for report processing.
        Dictionary Contains:
            file - .csv usage report file name
            date - DateTime that the payload was created
            uuid - uuid for payload
            cluster_id  - OCP cluster ID.
    2. *.csv - Actual usage report for the cluster.  Format is:
        Format is: <uuid>_report_name.csv

    On successful completion the report and manifest will be in a directory
    structure that the OCPReportDownloader is expecting.

    Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101

    Args:
        basepath (String): base local directory path.
        payload_file (String): path to payload.tar.gz file containing report and manifest.

    Returns:
        None

    """
    # Create temporary directory for initial file staging and verification
    temp_dir = tempfile.mkdtemp()

    # Extract tarball into temp directory
    try:
        mytar = TarFile.open(payload_file)
        mytar.extractall(path=temp_dir)
        files = mytar.getnames()
        manifest_path = [manifest for manifest in files if "manifest.json" in manifest]
    except ReadError as error:
        LOG.error("Unable to untar file. Reason: {}".format(str(error)))
        shutil.rmtree(temp_dir)
        return

    # Open manifest.json file and build the payload dictionary.
    full_manifest_path = "{}/{}".format(temp_dir, manifest_path[0])
    report_meta = get_report_details(os.path.dirname(full_manifest_path))

    # Create directory tree for report.
    usage_month = month_date_range(report_meta.get("date"))
    destination_dir = "{}/{}/{}".format(base_path, report_meta.get("cluster_id"), usage_month)
    os.makedirs(destination_dir, exist_ok=True)

    # Copy manifest
    manifest_destination_path = "{}/{}".format(destination_dir, os.path.basename(report_meta.get("manifest_path")))
    shutil.copy(report_meta.get("manifest_path"), manifest_destination_path)

    # Copy report payload
    for report_file in report_meta.get("files"):
        subdirectory = os.path.dirname(full_manifest_path)
        payload_source_path = f"{subdirectory}/{report_file}"
        payload_destination_path = f"{destination_dir}/{report_file}"
        try:
            shutil.copy(payload_source_path, payload_destination_path)
        except FileNotFoundError:
            pass

    LOG.info("Successfully extracted OCP for {}/{}".format(report_meta.get("cluster_id"), usage_month))
    # Remove temporary directory and files
    shutil.rmtree(temp_dir)