示例#1
0
def get_report_details(report_directory):
    """
    Get OCP usage report details from manifest file.

    Date range is aligned on the first day of the current
    month and ends on the first day of the next month from the
    specified date.

    Args:
        report_directory (String): base directory for report.

    Returns:
        (Dict): keys: value
            "file: String,
             cluster_id: String,
             payload_date: DateTime,
             manifest_path: String,
             uuid: String,
             manifest_path: String"

    """
    manifest_path = "{}/{}".format(report_directory, "manifest.json")

    payload_dict = {}
    try:
        with open(manifest_path) as file:
            payload_dict = json.load(file)
            payload_dict["date"] = parser.parse(payload_dict["date"])
            payload_dict["manifest_path"] = manifest_path
    except (OSError, IOError, KeyError):
        LOG.error("Unable to extract manifest data")

    return payload_dict
示例#2
0
def upload_to_azure_container(storage_file_name, local_path,
                              storage_file_path):
    """Upload data to a storage account.

    Args:
        storage_file_name (String): The container to upload file to
        local_path  (String): The full local file system path of the file
        storage_file_path (String): The file path to upload to within container

    Returns:
        (Boolean): True if file was uploaded

    """
    try:
        # Retrieve the connection string for use with the application.
        connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
        blob_service_client = BlobServiceClient.from_connection_string(
            connect_str)
        blob_client = blob_service_client.get_blob_client(
            container=storage_file_name, blob=storage_file_path)
        with open(local_path, "rb") as data:
            blob_client.upload_blob(data=data)
        LOG.info(f"uploaded {storage_file_name} to {storage_file_path}")
    except (CloudError, ClientException, IOError) as error:
        LOG.error(error)
        traceback.print_exc(file=sys.stderr)
        return False
    return True
示例#3
0
def _remove_files(file_list):
    """Remove files."""
    for file_path in file_list:
        try:
            os.remove(file_path)
        except FileNotFoundError:
            LOG.error(f"File {file_path} was not found.")
            raise FileNotFoundError
示例#4
0
def ocp_route_file(insights_upload, local_path):
    """Route file to either Upload Service or local filesystem."""
    if os.path.isdir(insights_upload):
        extract_payload(insights_upload, local_path)
    else:
        response = post_payload_to_ingest_service(insights_upload, local_path)
        if response.status_code == 202:
            LOG.info("File uploaded successfully.")
        else:
            LOG.error(f"{response.status_code} File upload failed.")

        LOG.info(response.text)
示例#5
0
def _load_static_report_data(options):
    """Validate/load and set start_date if static file is provided."""
    if not options.get("static_report_file"):
        return

    static_file = options.get("static_report_file")
    if not os.path.exists(static_file):
        LOG.error(f"file does not exist: '{static_file}'")
        sys.exit()

    LOG.info("Loading static data...")
    aws_tags = set()
    start_dates = []
    end_dates = []
    static_report_data = load_yaml(static_file)
    for generator_dict in static_report_data.get("generators"):
        for _, attributes in generator_dict.items():
            start_date = get_start_date(attributes, options)
            generated_start_date = calculate_start_date(start_date)
            start_dates.append(generated_start_date)

            if attributes.get("end_date"):
                generated_end_date = calculate_end_date(
                    generated_start_date, attributes.get("end_date"))
            elif options.get("end_date") and options.get(
                    "end_date").date() != today().date():
                generated_end_date = calculate_end_date(
                    generated_start_date, options.get("end_date"))
            else:
                generated_end_date = today()
            if options.get("provider") == "azure":
                generated_end_date += datetime.timedelta(hours=24)
            end_dates.append(generated_end_date)

            attributes["start_date"] = str(generated_start_date)
            attributes["end_date"] = str(generated_end_date)

            if options.get("provider") == "aws":
                aws_tags.update(attributes.get("tags", {}).keys())

    options["start_date"] = min(start_dates)
    latest_date = max(end_dates)
    last_day_of_month = calendar.monthrange(year=latest_date.year,
                                            month=latest_date.month)[1]
    options["end_date"] = latest_date.replace(day=last_day_of_month,
                                              hour=0,
                                              minute=0)
    options["static_report_data"] = static_report_data

    if options.get("provider") == "aws" and aws_tags:
        options["aws_tags"] = aws_tags

    return True
示例#6
0
def upload_to_s3(bucket_name, bucket_file_path, local_path):
    """Upload data to an S3 bucket.

    Args:
        bucket_name (String): The name of the S3 bucket
        bucket_file_path (String): The path to store the file to
        local_path  (String): The local file system path of the file
    Returns:
        (Boolean): True if file was uploaded

    """
    uploaded = True
    try:
        s3_client = boto3.resource("s3")
        s3_client.Bucket(bucket_name).upload_file(local_path, bucket_file_path)
        msg = f"Uploaded {bucket_file_path} to s3 bucket {bucket_name}."
        LOG.info(msg)
    except (ClientError, BotoConnectionError,
            boto3.exceptions.S3UploadFailedError) as upload_err:
        LOG.error(upload_err)
        uploaded = False
    return uploaded
示例#7
0
def upload_to_gcp_storage(bucket_name, source_file_name,
                          destination_blob_name):
    """
    Upload data to a GCP Storage Bucket.

    Args:
        bucket_name (String): The container to upload file to
        source_file_name  (String): The full local file system path of the file
        destination_blob_name (String): Destination blob name to store in GCP.

    Returns:
        (Boolean): True if file was uploaded

    """
    uploaded = True

    if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
        LOG.warning("Please set your GOOGLE_APPLICATION_CREDENTIALS "
                    "environment variable before attempting to load file into"
                    "GCP Storage.")
        return False
    try:
        storage_client = storage.Client()

        bucket = storage_client.get_bucket(bucket_name)
        blob = bucket.blob(destination_blob_name)

        blob.upload_from_filename(source_file_name)

        LOG.info(
            f"File {source_file_name} uploaded to GCP Storage {destination_blob_name}."
        )
    except GoogleCloudError as upload_err:
        LOG.error(upload_err)
        uploaded = False
    return uploaded
示例#8
0
def extract_payload(base_path, payload_file):
    """
    Extract OCP usage report payload into local directory structure.

    Payload is expected to be a .tar.gz file that contains:
    1. manifest.json - dictionary containing usage report details needed
        for report processing.
        Dictionary Contains:
            file - .csv usage report file name
            date - DateTime that the payload was created
            uuid - uuid for payload
            cluster_id  - OCP cluster ID.
    2. *.csv - Actual usage report for the cluster.  Format is:
        Format is: <uuid>_report_name.csv

    On successful completion the report and manifest will be in a directory
    structure that the OCPReportDownloader is expecting.

    Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101

    Args:
        basepath (String): base local directory path.
        payload_file (String): path to payload.tar.gz file containing report and manifest.

    Returns:
        None

    """
    # Create temporary directory for initial file staging and verification
    temp_dir = tempfile.mkdtemp()

    # Extract tarball into temp directory
    try:
        mytar = TarFile.open(payload_file)
        mytar.extractall(path=temp_dir)
        files = mytar.getnames()
        manifest_path = [manifest for manifest in files if "manifest.json" in manifest]
    except ReadError as error:
        LOG.error("Unable to untar file. Reason: {}".format(str(error)))
        shutil.rmtree(temp_dir)
        return

    # Open manifest.json file and build the payload dictionary.
    full_manifest_path = "{}/{}".format(temp_dir, manifest_path[0])
    report_meta = get_report_details(os.path.dirname(full_manifest_path))

    # Create directory tree for report.
    usage_month = month_date_range(report_meta.get("date"))
    destination_dir = "{}/{}/{}".format(base_path, report_meta.get("cluster_id"), usage_month)
    os.makedirs(destination_dir, exist_ok=True)

    # Copy manifest
    manifest_destination_path = "{}/{}".format(destination_dir, os.path.basename(report_meta.get("manifest_path")))
    shutil.copy(report_meta.get("manifest_path"), manifest_destination_path)

    # Copy report payload
    for report_file in report_meta.get("files"):
        subdirectory = os.path.dirname(full_manifest_path)
        payload_source_path = f"{subdirectory}/{report_file}"
        payload_destination_path = f"{destination_dir}/{report_file}"
        try:
            shutil.copy(payload_source_path, payload_destination_path)
        except FileNotFoundError:
            pass

    LOG.info("Successfully extracted OCP for {}/{}".format(report_meta.get("cluster_id"), usage_month))
    # Remove temporary directory and files
    shutil.rmtree(temp_dir)
示例#9
0
def gcp_bucket_to_dataset(gcp_bucket_name, file_name, dataset_name,
                          table_name):
    """
    Create a gcp dataset from a file stored in a bucket.

    Args:
        gcp_bucket_name  (String): The container to upload file to
        file_name  (String): The name of the file stored in GCP
        dataset_name (String): name for the created dataset in GCP
        table_name (String): name for the created dataset in GCP

    Returns:
        (Boolean): True if the dataset was created

    """
    uploaded = True

    if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
        LOG.warning(
            "Please set your GOOGLE_APPLICATION_CREDENTIALS "
            "environment variable before attempting to create a dataset.")
        return False
    try:
        bigquery_client = bigquery.Client()

        project_name = bigquery_client.project
        dataset_id = f"{project_name}.{dataset_name}"
        dataset = bigquery.Dataset(dataset_id)

        # delete dataset (does not error if it doesn't exist) and create fresh one
        bigquery_client.delete_dataset(dataset_id,
                                       delete_contents=True,
                                       not_found_ok=True)
        dataset = bigquery_client.create_dataset(dataset)

        table_id = f"{project_name}.{dataset_name}.{table_name}"

        # creates the job config with specifics
        job_config = bigquery.LoadJobConfig(
            write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
            source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
            time_partitioning=bigquery.TimePartitioning(),
            schema=[
                {
                    "name": "billing_account_id",
                    "type": "STRING",
                    "mode": "NULLABLE"
                },
                {
                    "name":
                    "service",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "description",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name":
                    "sku",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "description",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name": "usage_start_time",
                    "type": "TIMESTAMP",
                    "mode": "NULLABLE"
                },
                {
                    "name": "usage_end_time",
                    "type": "TIMESTAMP",
                    "mode": "NULLABLE"
                },
                {
                    "name":
                    "project",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "number",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "name",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name":
                            "labels",
                            "type":
                            "RECORD",
                            "fields": [
                                {
                                    "name": "key",
                                    "type": "STRING",
                                    "mode": "NULLABLE"
                                },
                                {
                                    "name": "value",
                                    "type": "STRING",
                                    "mode": "NULLABLE"
                                },
                            ],
                            "mode":
                            "REPEATED",
                        },
                        {
                            "name": "ancestry_numbers",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name":
                    "labels",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "key",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "value",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "REPEATED",
                },
                {
                    "name":
                    "system_labels",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "key",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "value",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "REPEATED",
                },
                {
                    "name":
                    "location",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "location",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "country",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "region",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "zone",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name": "export_time",
                    "type": "TIMESTAMP",
                    "mode": "NULLABLE"
                },
                {
                    "name": "cost",
                    "type": "FLOAT",
                    "mode": "NULLABLE"
                },
                {
                    "name": "currency",
                    "type": "STRING",
                    "mode": "NULLABLE"
                },
                {
                    "name": "currency_conversion_rate",
                    "type": "FLOAT",
                    "mode": "NULLABLE"
                },
                {
                    "name":
                    "usage",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "amount",
                            "type": "FLOAT",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "unit",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "amount_in_pricing_units",
                            "type": "FLOAT",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "pricing_unit",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name":
                    "credits",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "name",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "amount",
                            "type": "FLOAT",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "full_name",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "type",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "REPEATED",
                },
                {
                    "name":
                    "invoice",
                    "type":
                    "RECORD",
                    "fields": [{
                        "name": "month",
                        "type": "STRING",
                        "mode": "NULLABLE"
                    }],
                    "mode":
                    "NULLABLE",
                },
                {
                    "name": "cost_type",
                    "type": "STRING",
                    "mode": "NULLABLE"
                },
                {
                    "name":
                    "adjustment_info",
                    "type":
                    "RECORD",
                    "fields": [
                        {
                            "name": "id",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "description",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "mode",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                        {
                            "name": "type",
                            "type": "STRING",
                            "mode": "NULLABLE"
                        },
                    ],
                    "mode":
                    "NULLABLE",
                },
            ],
        )

        uri = f"gs://{gcp_bucket_name}/{file_name}"

        load_job = bigquery_client.load_table_from_uri(uri,
                                                       table_id,
                                                       job_config=job_config)

        # waits for the job to finish, will raise an exception if it doesnt work
        load_job.result()

        # after the table is created, delete the file from the storage bucket
        storage_client = storage.Client()
        bucket = storage_client.bucket(gcp_bucket_name)
        blob = bucket.blob(file_name)
        blob.delete()

        LOG.info(
            f"Dataset {dataset_name} created in GCP bigquery under the table name {table_name}."
        )
    except GoogleCloudError as upload_err:
        LOG.error(upload_err)
        uploaded = False
    return uploaded