示例#1
0
文件: 2_1.py 项目: boydjj/think_stats
def pumpkin(weights):
    """
    Given an iterable of pumpkin weights, compute the sequence's mean,
    variance, and standard deviation.
    """
    mean = thinkstats.Mean(weights)
    variance = thinkstats.Var(weights, mean)
    stddev = std_dev(weights, mean, variance)

    return mean, variance, stddev
示例#2
0
文件: 2_1.py 项目: boydjj/think_stats
def pumpkin(weights):
    """
    Given an iterable of pumpkin weights, compute the sequence's mean,
    variance, and standard deviation.
    """
    mean = thinkstats.Mean(weights)
    variance = thinkstats.Var(weights, mean)
    stddev = std_dev(weights, mean, variance)

    return mean, variance, stddev
示例#3
0
    def analyze(self, results):
        """Analyze the results."""
        
        for result in results:
            self.num_of_results += 1
            if result.winner in self.win_counts:
                self.win_counts[result.winner] += 1
            else:
                self.win_counts[result.winner] = 1
            
            for party in result.seats.keys():
                if party in self.seats.keys():
                    self.seats[party].append(result.seats[party])
                else:
                    self.seats[party] = [result.seats[party]]
                
            if result.largest_party in self.largest_party_counts:
                self.largest_party_counts[result.largest_party] += 1
            else:
                self.largest_party_counts[result.largest_party] = 1
                
            self.margins_of_victory.append(result.margin_of_victory)
            
            if result.most_seats_won > self.most_seats_won:
                self.most_seats_won = result.most_seats_won
                self.most_seats_won_party = result.largest_party
                
            if result.greens_hold_brighton:
                self.greens_hold_brighton_count += 1
                
            if result.seat_winner_is_pop_winner:
                self.seat_winner_is_pop_winner_count += 1
                
            for tgt in result.ukip_stealth_targets:
                if tgt in self.ukip_stealth_targets:
                    self.ukip_stealth_targets[tgt].append(result.
                                                      ukip_stealth_targets[tgt])
                else:
                    self.ukip_stealth_targets[tgt] = [result.
                                                      ukip_stealth_targets[tgt]]

            for coal in result.possible_coalitions:
                if coal in self.possible_coalitions:
                    self.possible_coalitions[coal] += 1
                else:
                    self.possible_coalitions[coal] = 1
                
        # Calculate the mean and standard deviation of the number of seats for
        # each party.
        for party in self.seats.keys():
            self.mean_seats[party] = (sum(self.seats[party]) / 
                                      float(len(self.seats[party])))
            self.stddev_seats[party] = utils.std_dev(self.seats[party])
            
        return
示例#4
0
def __optimize_spot_bid(env, instance_type, bid_price):
    '''
    There is a high variability of prices between zones and between instance types;
    Bidding high above an observed spot price leads to a large cost increase without
    a significant decrease in a computation time.
    There are data transfer fees when moving data between zones! If we write out
    results to S3, no actual data transfer between zones.

    Pricing is to maximize revenue, given the user demand, in a supposed (but
    realistic) infinite resource availability.

    Static bidding strategy VS dynamic bidding strategies that adjusts bid prices
    according to application's execution requirements and market prices.

    The  optimal  bidding  price  only  depends  on a  job's  sensitivity  to  delay.

    The value of a compute job is obviously of relevance when it comes to
    determining what a user is willing to pay to have it executed.

    According to the paper on Bidding Strategies, "bidding with 25% of on-demand price
    gives most balanced performance for scientific workflows"
    (Experimental Study of Bidding Strategies for Scientific Workflows using AWS
    Spot Instances)

    :rtype: the best stable zone AND an optimal bid price? (WHAT IS AN OPTIMAL BID PRICE?)
    '''

    # zones over the bid price and under bid price
    markets_under_bid, markets_over_bid = [], []
    zone_hists, prices = [], []

    spot_hist = __get_spot_history(env, instance_type)
    zones = env.ec2client.describe_availability_zones()

    # find best stable zone for placing spot instances
    for zone in zones['AvailabilityZones']:
        resp = [zone_hists.append(zh) for zh in spot_hist if zh['AvailabilityZone'] == zone['ZoneName']]

        if zone_hists:
            prices = [hp['SpotPrice'] for hp in zone_hists]
            prices = map(float, prices)
            price_dev = std_dev(prices)
            recent_price = float(zone_hists[0]['SpotPrice'])
        else:
            price_dev, recent_price = 0.0, bid_price

        best_zone = BestAvZone(name=zone['ZoneName'], price_deviation=price_dev)
        (markets_over_bid, markets_under_bid)[recent_price < bid_price].append(best_zone)


    stable_zone = min(markets_under_bid or markets_over_bid,
                      key=attrgetter('price_deviation')).name

    # Check spot history and deduce if it is a reasonable spot price
    sh = [pr['SpotPrice'] for pr in spot_hist]
    sh = [round(float(i),2) for i in sh]
    if sh:
        avg = mean(sh)
        if bid_price > avg*2:
            log.warning("Bid price is twice the average spot price in this region for the last week. "
                    "(YOURS: %s; AVG: %s)\n"
                    "Halving it!" )
            bid_price /= 2

    return (stable_zone,bid_price)
示例#5
0
from my_first import partition_births
import survey
from thinkstats import Mean, Var
from utils import std_dev

if __name__ == '__main__':
    data_dir = sys.argv[1]
    table = survey.Pregnancies()
    table.ReadRecords(data_dir)

    firsts, others = partition_births(table)

    firsts_gestation_lengths = list((p.prglength for p in firsts.records))
    others_gestation_lengths = list((p.prglength for p in others.records))

    for births in (firsts, others):
        births_gestation_lengths = list((p.prglength for p in births.records))
        births.mean = Mean(births_gestation_lengths)
        births.variance = Var(births_gestation_lengths, births.mean)
        births.std_dev = std_dev(births_gestation_lengths, births.mean,
                                 births.variance)

    print 'The mean gestation time for firstborns is:', firsts.mean
    print 'The mean gestation time for others is:', others.mean

    print 'The gestation time variance for firstborns is:', firsts.variance
    print 'The gestation time variance for others is:', others.variance

    print 'The standard deviation of gestation times for firstborns is:', firsts.std_dev
    print 'The standard deviation of gestation times for others is:', others.std_dev
示例#6
0
文件: 2_2.py 项目: boydjj/think_stats
import survey
from thinkstats import Mean, Var
from utils import std_dev


if __name__ == '__main__':
    data_dir = sys.argv[1]
    table = survey.Pregnancies()
    table.ReadRecords(data_dir)

    firsts, others = partition_births(table)

    firsts_gestation_lengths = list((p.prglength for p in firsts.records))
    others_gestation_lengths = list((p.prglength for p in others.records))

    for births in (firsts, others):
        births_gestation_lengths = list((p.prglength for p in births.records))
        births.mean = Mean(births_gestation_lengths)
        births.variance = Var(births_gestation_lengths, births.mean)
        births.std_dev = std_dev(births_gestation_lengths, births.mean, births.variance)


    print 'The mean gestation time for firstborns is:', firsts.mean
    print 'The mean gestation time for others is:', others.mean

    print 'The gestation time variance for firstborns is:', firsts.variance
    print 'The gestation time variance for others is:', others.variance

    print 'The standard deviation of gestation times for firstborns is:', firsts.std_dev
    print 'The standard deviation of gestation times for others is:', others.std_dev
示例#7
0
    def report(self):
        """Report overall results."""
        
        # Get the mean and standard deviation of the margin of victory.
        mean_margin_of_victory = (sum(self.margins_of_victory) /
                                  float(self.num_of_results))
        margin_stddev = utils.std_dev(self.margins_of_victory)
        
        # Report the results from this analysis.
        print "Winning percentages:"
        for party in sorted(self.win_counts.iteritems(),
                            key=itemgetter(1), 
                            reverse=True):
            if party[0] is None:
                party_name = "[Hung Parliament]"
            else:
                party_name = party[0]
            print "  {0}: {1}%".format(party_name,
                                get_result_percentage(self.win_counts[party[0]],
                                                      self.num_of_results))
            
        if len(self.possible_coalitions) > 0:
            print "Feasible coalitions in hung parliaments:"
            for coal in sorted(self.possible_coalitions.items(),
                               key=itemgetter(1),
                               reverse=True):
                print "  {0} ({1:.1f}%)".format(coal[0],
                                  (float(coal[1]) /
                                  sum(self.possible_coalitions.values())) * 100)
        
        print "Largest-party percentages:"
        for party in sorted(self.largest_party_counts.keys()):
            print "  {0}: {1}%".format(party,
                                       get_result_percentage(
                                               self.largest_party_counts[party],
                                               self.num_of_results))
            
        print "Mean number of seats per-party (95% confidence intervals):"
        for party in sorted(self.mean_seats.keys(), 
                            key=self.mean_seats.get,
                            reverse=True):
            print "  {0}: {1} ({2:.2f}-{3:.2f})".format(
                                               party,
                                               self.mean_seats[party],
                                               (self.mean_seats[party] -
                                                (2 * self.stddev_seats[party])),
                                               (self.mean_seats[party] + 
                                                (2 * self.stddev_seats[party])))
            
        print ("Mean margin of victory: {0} (95% between {1:.2f} and"
               " {2:.2f})".format(
                                mean_margin_of_victory,
                                (mean_margin_of_victory - (2 * margin_stddev)),
                                (mean_margin_of_victory + (2 * margin_stddev))))

        print ("Greens hold Brighton Pavilion in "
               "{0}% of runs".format(get_result_percentage(
                                                self.greens_hold_brighton_count,
                                                self.num_of_results)))
        
        if len(self.ukip_stealth_targets) > 0:
            print "Most common UKIP stealth targets:"
        for tgt in sorted(self.ukip_stealth_targets.items(),
                          key=lambda x: sum(x[1]),
                          reverse=True):
            print "  {0} (mean CON majority {1:.1f})".format(tgt[0],
                                                            (float(sum(tgt[1]))/
                                                            len(tgt[1])))
        
        return
示例#8
0
    def __optimize_spot_bid(self, instance_type, bid_price):
        '''
        There is a high variability of prices between zones and between instance types;
        Bidding high above an observed spot price leads to a large cost increase without
        a significant decrease in a computation time.
        There are data transfer fees when moving data between zones! If we write out
        results to S3, no actual data transfer between zones.

        Pricing is to maximize revenue, given the user demand, in a supposed (but
        realistic) infinite resource availability.


        Static bidding strategy VS dynamic bidding strategies that adjust bid prices
        according to application's execution requirements and market prices.

        Static bidding example: bid with one quarter of the on-demand price;
        bid with 25% more of the minimum price in the spot pricing history

        Dynamic bidding example: bid according to the probability distribution
        of all the market prices existed in the spot pricing history, and the
        remaining deadline at the beginning of each instance hour;


        The  optimal  bidding  price  only  depends  on a  job's  sensitivity
        to  delay.

        The value of a compute job is obviously of relevance when it comes to
        determining what a user is willing to pay to have it executed.

        According to the paper on Bidding Strategies, "bidding with 25% of on-demand price
        gives most balanced performance for scientific workflows"
        (Experimental Study of Bidding Strategies for Scientific Workflows using AWS
        Spot Instances)

        :return: the best stable zone AND an optimal bid price? (WHAT IS AN OPTIMAL BID PRICE?)
        '''

        # zones over the bid price and under bid price
        markets_under_bid, markets_over_bid = [], []
        zone_hists, prices = [], []

        log.info("Optimising bid price and placement for the spot request...")

        spot_hist = self.__get_spot_history(instance_type)
        zones = self.env.ec2client.describe_availability_zones()

        def check_spot_prices(spot_hist):
            sh = [round(float(pr['SpotPrice']),4) for pr in spot_hist]
            if sh:
                avg = mean(sh)
                smaller = min(sh)
                return (smaller, round(avg,4))


        # find best stable zone for placing spot instances
        for zone in zones['AvailabilityZones']:
            resp = [zone_hists.append(zh) for zh in spot_hist if zh['AvailabilityZone'] == zone['ZoneName']]

            recent_price = 0.0
            if zone_hists:
                prices = [round(float(hp['SpotPrice']),4) for hp in zone_hists]
                #prices = map(float, prices)
                price_dev = std_dev(prices)
                recent_price = round(float(zone_hists[0]['SpotPrice']),4)
                best_zone = BestAvZone(name=zone['ZoneName'], price_deviation=price_dev)
            else:
                price_dev, recent_price = 0.0, bid_price
                best_zone = BestAvZone(name=zone['ZoneName'], price_deviation=price_dev)

            # if False on first, else on second
            (markets_over_bid, markets_under_bid)[recent_price < bid_price].append(best_zone)

        stable_zone = min(markets_under_bid or markets_over_bid,
                          key=attrgetter('price_deviation')).name

        # Check spot history and deduce if it is a reasonable spot price
        sm, avg = check_spot_prices(spot_hist)
        if bid_price > (avg*2.0):
            log.info("Bid price is twice than the average spot price of the last week:\n"
                        " - YOURS: %s --> AVG: %s)", str(bid_price), str(avg))
            bid_price = avg + ((25/100)*avg)
            log.info("Bidding with 25%% more than the average: %s", str(bid_price))

        if bid_price <= sm:
            bid_price = sm + ((25/100)*sm)
            log.info("Bid price is %s, i.e. 25%% more than the minimum in spot pricing history",
                        str(bid_price))

        log.info("Spot request placed in %s at $s$", stable_zone, bid_price)
        return (stable_zone, bid_price)