示例#1
0
def scrape(output_file, input_file=None):
    # if an argument is given, use that as the path for the json file
    if input_file:
        with open(input_file) as json_data:
            data = json.load(json_data)
    else:
        price_index = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonRDS/current/index.json"
        index = requests.get(price_index)
        data = index.json()

    rds_instances = {}
    instances = {}

    # region mapping, someone thought it was handy not to include the region id's :(
    regions = ec2.get_region_descriptions()

    # loop through products, and only fetch available instances for now
    for sku, product in six.iteritems(data["products"]):
        if product.get("productFamily", None) == "Database Instance":
            attributes = product["attributes"]

            # skip multi-az
            if attributes["deploymentOption"] != "Single-AZ":
                continue

            # map the region
            location = ec2.canonicalize_location(attributes["location"])
            instance_type = attributes["instanceType"]
            try:
                region = regions[location]
            except KeyError as e:
                if location == "Any":
                    region = "us-east-1"
                else:
                    print(
                        f"ERROR: No region data for location={location}. Ignoring instance with sku={sku}, type={instance_type}"
                    )
                    continue

            # set the attributes in line with the ec2 index
            attributes["region"] = region
            attributes["memory"] = attributes["memory"].split(" ")[0]
            attributes["network_performance"] = attributes.get(
                "networkPerformance", None)
            attributes["family"] = attributes["instanceFamily"]
            attributes["instance_type"] = instance_type
            attributes["database_engine"] = attributes["databaseEngine"]
            attributes["arch"] = attributes.get("processorArchitecture", None)
            attributes["pricing"] = {}
            attributes["pricing"][region] = {}

            if attributes.get("engineCode", None) == None:
                print(
                    f"No Engine Code found. Ignoring instance with sku={sku}")
                continue

            if attributes["engineCode"] not in ["210", "220"]:
                rds_instances[sku] = attributes

                if instance_type not in instances.keys():
                    # delete some attributes that are inconsistent among skus
                    new_attributes = (
                        attributes.copy()
                    )  # make copy so we can keep these attributes with the sku
                    new_attributes.pop("databaseEdition", None)
                    new_attributes.pop("databaseEngine", None)
                    new_attributes.pop("database_engine", None)
                    new_attributes.pop("deploymentOption", None)
                    new_attributes.pop("engineCode", None)
                    new_attributes.pop("licenseModel", None)
                    new_attributes.pop("location", None)
                    new_attributes.pop("locationType", None)
                    new_attributes.pop("operation", None)
                    new_attributes.pop("region", None)
                    new_attributes.pop("usagetype", None)
                    new_attributes["pricing"] = attributes["pricing"]

                    instances[instance_type] = new_attributes

    # Parse ondemand pricing
    for sku, offers in six.iteritems(data["terms"]["OnDemand"]):
        for code, offer in six.iteritems(offers):
            for key, dimension in six.iteritems(offer["priceDimensions"]):

                # skip these for now
                if any(descr in dimension["description"].lower() for descr in [
                        "transfer",
                        "global",
                        "storage",
                        "iops",
                        "requests",
                        "multi-az",
                ]):
                    continue

                instance = rds_instances.get(sku)
                if not instance:
                    # print(f"WARNING: Received on demand pricing info for unknown sku={sku}")
                    continue

                if (instance["region"] not in instances[
                        instance["instance_type"]]["pricing"]):
                    instances[instance["instance_type"]]["pricing"][
                        instance["region"]] = {}

                instances[instance["instance_type"]]["pricing"][
                    instance["region"]][instance["engineCode"]] = {
                        "ondemand": float(dimension["pricePerUnit"]["USD"])
                    }

                # keep this for backwards compatibility, even though it's wrong
                # (database_engine is not unique, so multiple offerings overlap)
                instances[instance["instance_type"]]["pricing"][
                    instance["region"]][instance["database_engine"]] = {
                        "ondemand": float(dimension["pricePerUnit"]["USD"])
                    }

    reserved_mapping = {
        "3yr Partial Upfront": "yrTerm3.partialUpfront",
        "1yr Partial Upfront": "yrTerm1.partialUpfront",
        "3yr All Upfront": "yrTerm3.allUpfront",
        "1yr All Upfront": "yrTerm1.allUpfront",
        "1yr No Upfront": "yrTerm1.noUpfront",
        "3yr No Upfront": "yrTerm3.noUpfront",
    }

    # Parse reserved pricing
    for sku, offers in six.iteritems(data["terms"]["Reserved"]):
        for code, offer in six.iteritems(offers):
            for key, dimension in six.iteritems(offer["priceDimensions"]):

                instance = rds_instances.get(sku)
                if not instance:
                    # print(f"WARNING: Received reserved pricing info for unknown sku={sku}")
                    continue

                # skip multi-az
                if instance["deploymentOption"] != "Single-AZ":
                    continue

                region = instance["region"]

                # create a regional hash
                if region not in instance["pricing"]:
                    instance["pricing"][region] = {}

                # create a database_engine hash
                if instance["database_engine"] not in instance["pricing"][
                        region]:
                    instance["pricing"][region][
                        instance["database_engine"]] = {}
                if instance["engineCode"] not in instance["pricing"][region]:
                    instance["pricing"][region][instance["engineCode"]] = {}

                # create a reserved hash
                if ("reserved"
                        not in instances[instance["instance_type"]]["pricing"][
                            instance["region"]][instance["database_engine"]]):
                    instances[instance["instance_type"]]["pricing"][
                        instance["region"]][
                            instance["database_engine"]]["reserved"] = {}
                if ("reserved" not in instances[instance["instance_type"]]
                    ["pricing"][instance["region"]][instance["engineCode"]]):
                    instances[instance["instance_type"]]["pricing"][instance[
                        "region"]][instance["engineCode"]]["reserved"] = {}

                reserved_type = "%s %s" % (
                    offer["termAttributes"]["LeaseContractLength"],
                    offer["termAttributes"]["PurchaseOption"],
                )

                instances[instance["instance_type"]]["pricing"][
                    instance["region"]][instance["engineCode"]]["reserved"][
                        "%s-%s" % (reserved_mapping[reserved_type],
                                   dimension["unit"].lower())] = float(
                                       dimension["pricePerUnit"]["USD"])
                instances[instance["instance_type"]]["pricing"][instance[
                    "region"]][instance["database_engine"]]["reserved"][
                        "%s-%s" % (reserved_mapping[reserved_type],
                                   dimension["unit"].lower())] = float(
                                       dimension["pricePerUnit"]["USD"])

    # Calculate all reserved effective pricings (upfront hourly + hourly price)
    for instance_type, instance in six.iteritems(instances):
        for region, pricing in six.iteritems(instance["pricing"]):
            for engine, prices in six.iteritems(pricing):
                if "reserved" not in prices:
                    continue
                try:
                    # no multi-az here
                    reserved_prices = {}

                    if "yrTerm3.partialUpfront-quantity" in prices["reserved"]:
                        reserved_prices["yrTerm3Standard.partialUpfront"] = (
                            prices["reserved"]
                            ["yrTerm3.partialUpfront-quantity"] /
                            (365 * 3) / 24
                        ) + prices["reserved"]["yrTerm3.partialUpfront-hrs"]

                    if "yrTerm1.partialUpfront-quantity" in prices["reserved"]:
                        reserved_prices["yrTerm1Standard.partialUpfront"] = (
                            prices["reserved"]
                            ["yrTerm1.partialUpfront-quantity"] / 365 / 24
                        ) + prices["reserved"]["yrTerm1.partialUpfront-hrs"]

                    if "yrTerm3.allUpfront-quantity" in prices["reserved"]:
                        reserved_prices["yrTerm3Standard.allUpfront"] = (
                            prices["reserved"]["yrTerm3.allUpfront-quantity"] /
                            (365 * 3) /
                            24) + prices["reserved"]["yrTerm3.allUpfront-hrs"]

                    if "yrTerm1.noUpfront-hrs" in prices["reserved"]:
                        reserved_prices["yrTerm1Standard.noUpfront"] = prices[
                            "reserved"]["yrTerm1.noUpfront-hrs"]

                    if "yrTerm3.noUpfront-hrs" in prices["reserved"]:
                        reserved_prices["yrTerm3Standard.noUpfront"] = prices[
                            "reserved"]["yrTerm3.noUpfront-hrs"]

                    instances[instance_type]["pricing"][region][engine][
                        "reserved"] = reserved_prices
                except Exception as e:
                    print(
                        "ERROR: Trouble generating RDS reserved price for {}: {!r}"
                        .format(instance_type, e))

    add_pretty_names(instances)

    # write output to file
    encoder.FLOAT_REPR = lambda o: format(o, ".5f")
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    with open(output_file, "w+") as outfile:
        json.dump(list(instances.values()), outfile, indent=1)
示例#2
0
def scrape(output_file, input_file=None):
    # if an argument is given, use that as the path for the json file
    if input_file:
        with open(input_file) as json_data:
            data = json.load(json_data)
    else:
        price_index = 'https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonRDS/current/index.json'
        index = requests.get(price_index)
        data = index.json()

    rds_instances = {}
    instances = {}

    # region mapping, someone thought it was handy not to include the region id's :(
    regions = ec2.get_region_descriptions()

    # loop through products, and only fetch available instances for now
    for sku, product in six.iteritems(data['products']):

        if product.get('productFamily', None) == 'Database Instance':
            attributes = product['attributes']

            # skip multi-az
            if attributes['deploymentOption'] != 'Single-AZ':
                continue

            # map the region
            location = ec2.canonicalize_location(attributes['location'])
            instance_type = attributes['instanceType']
            try:
                region = regions[location]
            except KeyError as e:
                if location == 'Any':
                    region = 'us-east-1'
                else:
                    print(f"ERROR: No region data for location={location}. Ignoring instance with sku={sku}, type={instance_type}")
                    continue

            # set the attributes in line with the ec2 index
            attributes['region'] = region
            attributes['memory'] = attributes['memory'].split(' ')[0]
            attributes['network_performance'] = attributes['networkPerformance']
            attributes['family'] = attributes['instanceFamily']
            attributes['instance_type'] = instance_type
            attributes['database_engine'] = attributes['databaseEngine']
            attributes['arch'] = attributes['processorArchitecture']
            attributes['pricing'] = {}
            attributes['pricing'][region] = {}

            if attributes['engineCode'] not in ['210', '220']:
                rds_instances[sku] = attributes

                if instance_type not in instances.keys():
                    # delete some attributes that are inconsistent among skus
                    new_attributes = attributes.copy() # make copy so we can keep these attributes with the sku
                    new_attributes.pop('databaseEdition', None)
                    new_attributes.pop('databaseEngine', None)
                    new_attributes.pop('database_engine', None)
                    new_attributes.pop('deploymentOption', None)
                    new_attributes.pop('engineCode', None)
                    new_attributes.pop('licenseModel', None)
                    new_attributes.pop('location', None)
                    new_attributes.pop('locationType', None)
                    new_attributes.pop('operation', None)
                    new_attributes.pop('region', None)
                    new_attributes.pop('usagetype', None)
                    new_attributes['pricing'] = attributes['pricing']

                    instances[instance_type] = new_attributes

    # Parse ondemand pricing
    for sku, offers in six.iteritems(data['terms']['OnDemand']):
        for code, offer in six.iteritems(offers):
            for key, dimension in six.iteritems(offer['priceDimensions']):

                # skip these for now
                if any(descr in dimension['description'].lower() for descr in ['transfer', 'global', 'storage', 'iops', 'requests', 'multi-az']):
                    continue

                instance = rds_instances.get(sku)
                if not instance:
                    # print(f"WARNING: Received on demand pricing info for unknown sku={sku}")
                    continue
 
                if instance['region'] not in instances[instance['instance_type']]['pricing']:
                    instances[instance['instance_type']]['pricing'][instance['region']] = {}

                instances[instance['instance_type']]['pricing'][instance['region']][instance['engineCode']] = {
                    'ondemand': float(dimension['pricePerUnit']['USD'])
                }

                # keep this for backwards compatibility, even though it's wrong
                # (database_engine is not unique, so multiple offerings overlap)
                instances[instance['instance_type']]['pricing'][instance['region']][instance['database_engine']] = {
                    'ondemand': float(dimension['pricePerUnit']['USD'])
                }

    reserved_mapping = {
        '3yr Partial Upfront': 'yrTerm3.partialUpfront',
        '1yr Partial Upfront': 'yrTerm1.partialUpfront',
        '3yr All Upfront': 'yrTerm3.allUpfront',
        '1yr All Upfront': 'yrTerm1.allUpfront',
        '1yr No Upfront': 'yrTerm1.noUpfront',
        '3yr No Upfront': 'yrTerm3.noUpfront',
    }

    # Parse reserved pricing
    for sku, offers in six.iteritems(data['terms']['Reserved']):
        for code, offer in six.iteritems(offers):
            for key, dimension in six.iteritems(offer['priceDimensions']):

                instance = rds_instances.get(sku)
                if not instance:
                    # print(f"WARNING: Received reserved pricing info for unknown sku={sku}")
                    continue

                # skip multi-az
                if instance['deploymentOption'] != 'Single-AZ':
                    continue

                region = instance['region']

                # create a regional hash
                if region not in instance['pricing']:
                    instance['pricing'][region] = {}

                # create a database_engine hash
                if instance['database_engine'] not in instance['pricing'][region]:
                    instance['pricing'][region][instance['database_engine']] = {}
                if instance['engineCode'] not in instance['pricing'][region]:
                    instance['pricing'][region][instance['engineCode']] = {}

                # create a reserved hash
                if 'reserved' not in instances[instance['instance_type']]['pricing'][instance['region']][instance['database_engine']]:
                    instances[instance['instance_type']]['pricing'][instance['region']][instance['database_engine']]['reserved'] = {}
                if 'reserved' not in instances[instance['instance_type']]['pricing'][instance['region']][instance['engineCode']]:
                    instances[instance['instance_type']]['pricing'][instance['region']][instance['engineCode']]['reserved'] = {}

                reserved_type = "%s %s" % (offer['termAttributes']['LeaseContractLength'], offer['termAttributes']['PurchaseOption'])

                instances[instance['instance_type']]['pricing'][instance['region']][instance['engineCode']]['reserved']['%s-%s' % (reserved_mapping[reserved_type], dimension['unit'].lower())] = float(dimension['pricePerUnit']['USD'])
                instances[instance['instance_type']]['pricing'][instance['region']][instance['database_engine']]['reserved']['%s-%s' % (reserved_mapping[reserved_type], dimension['unit'].lower())] = float(dimension['pricePerUnit']['USD'])
   
    # Calculate all reserved effective pricings (upfront hourly + hourly price)
    for instance_type, instance in six.iteritems(instances):
        for region, pricing in six.iteritems(instance['pricing']):
            for engine, prices in six.iteritems(pricing):
                if 'reserved' not in prices:
                    continue
                try:
                    # no multi-az here
                    reserved_prices = {}

                    if 'yrTerm3.partialUpfront-quantity' in prices['reserved']:
                        reserved_prices['yrTerm3Standard.partialUpfront'] = (prices['reserved']['yrTerm3.partialUpfront-quantity'] / (365 * 3) / 24) + prices['reserved']['yrTerm3.partialUpfront-hrs']                    

                    if 'yrTerm1.partialUpfront-quantity' in prices['reserved']:
                        reserved_prices['yrTerm1Standard.partialUpfront'] = (prices['reserved']['yrTerm1.partialUpfront-quantity'] / 365 / 24) + prices['reserved']['yrTerm1.partialUpfront-hrs']   

                    if 'yrTerm3.allUpfront-quantity' in prices['reserved']:
                        reserved_prices['yrTerm3Standard.allUpfront'] = (prices['reserved']['yrTerm3.allUpfront-quantity'] / (365 * 3) / 24) + prices['reserved']['yrTerm3.allUpfront-hrs']

                    if 'yrTerm1.noUpfront-hrs' in prices['reserved']:
                        reserved_prices['yrTerm1Standard.noUpfront'] = prices['reserved']['yrTerm1.noUpfront-hrs'] 

                    if 'yrTerm3.noUpfront-hrs' in prices['reserved']:
                        reserved_prices['yrTerm3Standard.noUpfront'] = prices['reserved']['yrTerm3.noUpfront-hrs']
                    
                    instances[instance_type]['pricing'][region][engine]['reserved'] = reserved_prices
                except Exception as e:
                    print("ERROR: Trouble generating RDS reserved price for {}: {!r}".format(instance_type, e))

    add_pretty_names(instances)

    # write output to file
    encoder.FLOAT_REPR = lambda o: format(o, '.5f')
    with open(output_file, 'w') as outfile:
        json.dump(list(instances.values()), outfile, indent=4)
示例#3
0
def scrape(output_file, input_file=None):
    # if an argument is given, use that as the path for the json file
    if input_file:
        with open(input_file) as json_data:
            data = json.load(json_data)
    else:
        price_index = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonElastiCache/current/index.json"
        index = requests.get(price_index)
        data = index.json()

    caches_instances = {}
    instances = {}

    # region mapping, someone thought it was handy not to include the region id's :(
    regions = ec2.get_region_descriptions()

    # loop through products, and only fetch available instances for now
    for sku, product in tqdm(six.iteritems(data["products"])):
        if product.get("productFamily", None) == "Cache Instance":
            attributes = product["attributes"]

            # map the region
            location = ec2.canonicalize_location(attributes["location"])
            instance_type = attributes["instanceType"]
            try:
                region = regions[location]
            except KeyError as e:
                if location == "Any":
                    region = "us-east-1"
                else:
                    print(
                        f"ERROR: No region data for location={location}. Ignoring instance with sku={sku}, type={instance_type}"
                    )
                    continue

            # set the attributes in line with the ec2 index
            attributes["region"] = region
            attributes["memory"] = attributes["memory"].split(" ")[0]
            attributes["network_performance"] = attributes.get(
                "networkPerformance", None
            )
            attributes["family"] = attributes["instanceFamily"]
            attributes["instance_type"] = instance_type
            attributes["cache_engine"] = attributes["cacheEngine"]
            attributes["pricing"] = {}
            attributes["pricing"][region] = {}

            caches_instances[sku] = attributes

            if instance_type not in instances.keys():
                # delete some attributes that are inconsistent among skus
                new_attributes = (
                    attributes.copy()
                )  # make copy so we can keep these attributes with the sku
                new_attributes.pop("cache_engine", None)
                new_attributes.pop("location", None)
                new_attributes.pop("locationType", None)
                new_attributes.pop("operation", None)
                new_attributes.pop("region", None)
                new_attributes.pop("usagetype", None)
                new_attributes["pricing"] = attributes["pricing"]

                instances[instance_type] = new_attributes

    # Parse ondemand pricing
    for sku, offers in six.iteritems(data["terms"]["OnDemand"]):
        for code, offer in six.iteritems(offers):
            for key, dimension in six.iteritems(offer["priceDimensions"]):

                # skip these for now
                if any(
                    descr in dimension["description"].lower()
                    for descr in [
                        "transfer",
                        "global",
                        "storage",
                        "iops",
                        "requests",
                        "multi-az",
                    ]
                ):
                    continue

                instance = caches_instances.get(sku)
                if not instance:
                    # print(f"WARNING: Received on demand pricing info for unknown sku={sku}")
                    continue

                region = instance["region"]
                instance_type = instance["instance_type"]
                cache_engine = instance["cache_engine"]

                if region not in instances[instance_type]["pricing"]:
                    # Initialise pricing for the instance_type
                    instances[instance_type]["pricing"][region] = {}

                instances[instance_type]["pricing"][region][cache_engine] = {
                    "ondemand": float(dimension["pricePerUnit"]["USD"])
                }

    reserved_mapping = {
        "1yr All Upfront": "yrTerm1.allUpfront",
        "1yr Partial Upfront": "yrTerm1.partialUpfront",
        "1yr No Upfront": "yrTerm1.noUpfront",
        "1yr Light Utilization": "yrTerm1.lightUtilization",
        "1yr Medium Utilization": "yrTerm1.mediumUtilization",
        "1yr Heavy Utilization": "yrTerm1.heavyUtilization",
        "3yr All Upfront": "yrTerm3.allUpfront",
        "3yr Partial Upfront": "yrTerm3.partialUpfront",
        "3yr No Upfront": "yrTerm3.noUpfront",
        "3yr Light Utilization": "yrTerm3.lightUtilization",
        "3yr Medium Utilization": "yrTerm3.mediumUtilization",
        "3yr Heavy Utilization": "yrTerm3.heavyUtilization",
    }

    # Parse reserved pricing
    for sku, offers in six.iteritems(data["terms"]["Reserved"]):
        for code, offer in six.iteritems(offers):
            for key, dimension in six.iteritems(offer["priceDimensions"]):

                instance = caches_instances.get(sku)
                if not instance:
                    # print(f"WARNING: Received reserved pricing info for unknown sku={sku}")
                    continue

                region = instance["region"]
                instance_type = instance["instance_type"]
                cache_engine = instance["cache_engine"]

                # create a regional hash
                if region not in instance["pricing"]:
                    instance["pricing"][region] = {}

                # create a cache_engine hash
                if cache_engine not in instance["pricing"][region]:
                    instance["pricing"][region][instance["cache_engine"]] = {}

                # create a reserved hash
                if (
                    "reserved"
                    not in instances[instance_type]["pricing"][region][cache_engine]
                ):
                    instances[instance_type]["pricing"][region][cache_engine][
                        "reserved"
                    ] = {}

                reserved_type = f"%s %s" % (
                    offer["termAttributes"]["LeaseContractLength"],
                    offer["termAttributes"]["PurchaseOption"],
                )

                instances[instance_type]["pricing"][region][cache_engine]["reserved"][
                    "%s-%s"
                    % (reserved_mapping[reserved_type], dimension["unit"].lower())
                ] = float(dimension["pricePerUnit"]["USD"])

    # Calculate all reserved effective pricings (upfront hourly + hourly price)
    # Since Light, Medium and Heavy utilization are from previous generations and are not available for choosing
    # anymore in AWS console, we are not calculating it
    for instance_type, instance in six.iteritems(instances):
        for region, pricing in six.iteritems(instance["pricing"]):
            for engine, prices in six.iteritems(pricing):
                if "reserved" not in prices:
                    continue
                try:
                    # no multi-az here
                    reserved_prices = {}

                    if "yrTerm3.partialUpfront-quantity" in prices["reserved"]:
                        reserved_prices["yrTerm3Standard.partialUpfront"] = (
                            prices["reserved"]["yrTerm3.partialUpfront-quantity"]
                            / (365 * 3)
                            / 24
                        ) + prices["reserved"]["yrTerm3.partialUpfront-hrs"]

                    if "yrTerm1.partialUpfront-quantity" in prices["reserved"]:
                        reserved_prices["yrTerm1Standard.partialUpfront"] = (
                            prices["reserved"]["yrTerm1.partialUpfront-quantity"]
                            / 365
                            / 24
                        ) + prices["reserved"]["yrTerm1.partialUpfront-hrs"]

                    if "yrTerm3.allUpfront-quantity" in prices["reserved"]:
                        reserved_prices["yrTerm3Standard.allUpfront"] = (
                            prices["reserved"]["yrTerm3.allUpfront-quantity"]
                            / (365 * 3)
                            / 24
                        ) + prices["reserved"]["yrTerm3.allUpfront-hrs"]

                    if "yrTerm1.noUpfront-hrs" in prices["reserved"]:
                        reserved_prices["yrTerm1Standard.noUpfront"] = prices[
                            "reserved"
                        ]["yrTerm1.noUpfront-hrs"]

                    if "yrTerm3.noUpfront-hrs" in prices["reserved"]:
                        reserved_prices["yrTerm3Standard.noUpfront"] = prices[
                            "reserved"
                        ]["yrTerm3.noUpfront-hrs"]

                    instances[instance_type]["pricing"][region][engine][
                        "reserved"
                    ] = reserved_prices
                except Exception as e:
                    print(
                        "ERROR: Trouble generating Cache reserved price for {}: {!r}".format(
                            instance_type, e
                        )
                    )

    add_pretty_names(instances)

    # write output to file
    encoder.FLOAT_REPR = lambda o: format(o, ".5f")
    with open(output_file, "w+") as outfile:
        json.dump(list(instances.values()), outfile, indent=1)