def pumpkin(weights): """ Given an iterable of pumpkin weights, compute the sequence's mean, variance, and standard deviation. """ mean = thinkstats.Mean(weights) variance = thinkstats.Var(weights, mean) stddev = std_dev(weights, mean, variance) return mean, variance, stddev
def analyze(self, results): """Analyze the results.""" for result in results: self.num_of_results += 1 if result.winner in self.win_counts: self.win_counts[result.winner] += 1 else: self.win_counts[result.winner] = 1 for party in result.seats.keys(): if party in self.seats.keys(): self.seats[party].append(result.seats[party]) else: self.seats[party] = [result.seats[party]] if result.largest_party in self.largest_party_counts: self.largest_party_counts[result.largest_party] += 1 else: self.largest_party_counts[result.largest_party] = 1 self.margins_of_victory.append(result.margin_of_victory) if result.most_seats_won > self.most_seats_won: self.most_seats_won = result.most_seats_won self.most_seats_won_party = result.largest_party if result.greens_hold_brighton: self.greens_hold_brighton_count += 1 if result.seat_winner_is_pop_winner: self.seat_winner_is_pop_winner_count += 1 for tgt in result.ukip_stealth_targets: if tgt in self.ukip_stealth_targets: self.ukip_stealth_targets[tgt].append(result. ukip_stealth_targets[tgt]) else: self.ukip_stealth_targets[tgt] = [result. ukip_stealth_targets[tgt]] for coal in result.possible_coalitions: if coal in self.possible_coalitions: self.possible_coalitions[coal] += 1 else: self.possible_coalitions[coal] = 1 # Calculate the mean and standard deviation of the number of seats for # each party. for party in self.seats.keys(): self.mean_seats[party] = (sum(self.seats[party]) / float(len(self.seats[party]))) self.stddev_seats[party] = utils.std_dev(self.seats[party]) return
def __optimize_spot_bid(env, instance_type, bid_price): ''' There is a high variability of prices between zones and between instance types; Bidding high above an observed spot price leads to a large cost increase without a significant decrease in a computation time. There are data transfer fees when moving data between zones! If we write out results to S3, no actual data transfer between zones. Pricing is to maximize revenue, given the user demand, in a supposed (but realistic) infinite resource availability. Static bidding strategy VS dynamic bidding strategies that adjusts bid prices according to application's execution requirements and market prices. The optimal bidding price only depends on a job's sensitivity to delay. The value of a compute job is obviously of relevance when it comes to determining what a user is willing to pay to have it executed. According to the paper on Bidding Strategies, "bidding with 25% of on-demand price gives most balanced performance for scientific workflows" (Experimental Study of Bidding Strategies for Scientific Workflows using AWS Spot Instances) :rtype: the best stable zone AND an optimal bid price? (WHAT IS AN OPTIMAL BID PRICE?) ''' # zones over the bid price and under bid price markets_under_bid, markets_over_bid = [], [] zone_hists, prices = [], [] spot_hist = __get_spot_history(env, instance_type) zones = env.ec2client.describe_availability_zones() # find best stable zone for placing spot instances for zone in zones['AvailabilityZones']: resp = [zone_hists.append(zh) for zh in spot_hist if zh['AvailabilityZone'] == zone['ZoneName']] if zone_hists: prices = [hp['SpotPrice'] for hp in zone_hists] prices = map(float, prices) price_dev = std_dev(prices) recent_price = float(zone_hists[0]['SpotPrice']) else: price_dev, recent_price = 0.0, bid_price best_zone = BestAvZone(name=zone['ZoneName'], price_deviation=price_dev) (markets_over_bid, markets_under_bid)[recent_price < bid_price].append(best_zone) stable_zone = min(markets_under_bid or markets_over_bid, key=attrgetter('price_deviation')).name # Check spot history and deduce if it is a reasonable spot price sh = [pr['SpotPrice'] for pr in spot_hist] sh = [round(float(i),2) for i in sh] if sh: avg = mean(sh) if bid_price > avg*2: log.warning("Bid price is twice the average spot price in this region for the last week. " "(YOURS: %s; AVG: %s)\n" "Halving it!" ) bid_price /= 2 return (stable_zone,bid_price)
from my_first import partition_births import survey from thinkstats import Mean, Var from utils import std_dev if __name__ == '__main__': data_dir = sys.argv[1] table = survey.Pregnancies() table.ReadRecords(data_dir) firsts, others = partition_births(table) firsts_gestation_lengths = list((p.prglength for p in firsts.records)) others_gestation_lengths = list((p.prglength for p in others.records)) for births in (firsts, others): births_gestation_lengths = list((p.prglength for p in births.records)) births.mean = Mean(births_gestation_lengths) births.variance = Var(births_gestation_lengths, births.mean) births.std_dev = std_dev(births_gestation_lengths, births.mean, births.variance) print 'The mean gestation time for firstborns is:', firsts.mean print 'The mean gestation time for others is:', others.mean print 'The gestation time variance for firstborns is:', firsts.variance print 'The gestation time variance for others is:', others.variance print 'The standard deviation of gestation times for firstborns is:', firsts.std_dev print 'The standard deviation of gestation times for others is:', others.std_dev
import survey from thinkstats import Mean, Var from utils import std_dev if __name__ == '__main__': data_dir = sys.argv[1] table = survey.Pregnancies() table.ReadRecords(data_dir) firsts, others = partition_births(table) firsts_gestation_lengths = list((p.prglength for p in firsts.records)) others_gestation_lengths = list((p.prglength for p in others.records)) for births in (firsts, others): births_gestation_lengths = list((p.prglength for p in births.records)) births.mean = Mean(births_gestation_lengths) births.variance = Var(births_gestation_lengths, births.mean) births.std_dev = std_dev(births_gestation_lengths, births.mean, births.variance) print 'The mean gestation time for firstborns is:', firsts.mean print 'The mean gestation time for others is:', others.mean print 'The gestation time variance for firstborns is:', firsts.variance print 'The gestation time variance for others is:', others.variance print 'The standard deviation of gestation times for firstborns is:', firsts.std_dev print 'The standard deviation of gestation times for others is:', others.std_dev
def report(self): """Report overall results.""" # Get the mean and standard deviation of the margin of victory. mean_margin_of_victory = (sum(self.margins_of_victory) / float(self.num_of_results)) margin_stddev = utils.std_dev(self.margins_of_victory) # Report the results from this analysis. print "Winning percentages:" for party in sorted(self.win_counts.iteritems(), key=itemgetter(1), reverse=True): if party[0] is None: party_name = "[Hung Parliament]" else: party_name = party[0] print " {0}: {1}%".format(party_name, get_result_percentage(self.win_counts[party[0]], self.num_of_results)) if len(self.possible_coalitions) > 0: print "Feasible coalitions in hung parliaments:" for coal in sorted(self.possible_coalitions.items(), key=itemgetter(1), reverse=True): print " {0} ({1:.1f}%)".format(coal[0], (float(coal[1]) / sum(self.possible_coalitions.values())) * 100) print "Largest-party percentages:" for party in sorted(self.largest_party_counts.keys()): print " {0}: {1}%".format(party, get_result_percentage( self.largest_party_counts[party], self.num_of_results)) print "Mean number of seats per-party (95% confidence intervals):" for party in sorted(self.mean_seats.keys(), key=self.mean_seats.get, reverse=True): print " {0}: {1} ({2:.2f}-{3:.2f})".format( party, self.mean_seats[party], (self.mean_seats[party] - (2 * self.stddev_seats[party])), (self.mean_seats[party] + (2 * self.stddev_seats[party]))) print ("Mean margin of victory: {0} (95% between {1:.2f} and" " {2:.2f})".format( mean_margin_of_victory, (mean_margin_of_victory - (2 * margin_stddev)), (mean_margin_of_victory + (2 * margin_stddev)))) print ("Greens hold Brighton Pavilion in " "{0}% of runs".format(get_result_percentage( self.greens_hold_brighton_count, self.num_of_results))) if len(self.ukip_stealth_targets) > 0: print "Most common UKIP stealth targets:" for tgt in sorted(self.ukip_stealth_targets.items(), key=lambda x: sum(x[1]), reverse=True): print " {0} (mean CON majority {1:.1f})".format(tgt[0], (float(sum(tgt[1]))/ len(tgt[1]))) return
def __optimize_spot_bid(self, instance_type, bid_price): ''' There is a high variability of prices between zones and between instance types; Bidding high above an observed spot price leads to a large cost increase without a significant decrease in a computation time. There are data transfer fees when moving data between zones! If we write out results to S3, no actual data transfer between zones. Pricing is to maximize revenue, given the user demand, in a supposed (but realistic) infinite resource availability. Static bidding strategy VS dynamic bidding strategies that adjust bid prices according to application's execution requirements and market prices. Static bidding example: bid with one quarter of the on-demand price; bid with 25% more of the minimum price in the spot pricing history Dynamic bidding example: bid according to the probability distribution of all the market prices existed in the spot pricing history, and the remaining deadline at the beginning of each instance hour; The optimal bidding price only depends on a job's sensitivity to delay. The value of a compute job is obviously of relevance when it comes to determining what a user is willing to pay to have it executed. According to the paper on Bidding Strategies, "bidding with 25% of on-demand price gives most balanced performance for scientific workflows" (Experimental Study of Bidding Strategies for Scientific Workflows using AWS Spot Instances) :return: the best stable zone AND an optimal bid price? (WHAT IS AN OPTIMAL BID PRICE?) ''' # zones over the bid price and under bid price markets_under_bid, markets_over_bid = [], [] zone_hists, prices = [], [] log.info("Optimising bid price and placement for the spot request...") spot_hist = self.__get_spot_history(instance_type) zones = self.env.ec2client.describe_availability_zones() def check_spot_prices(spot_hist): sh = [round(float(pr['SpotPrice']),4) for pr in spot_hist] if sh: avg = mean(sh) smaller = min(sh) return (smaller, round(avg,4)) # find best stable zone for placing spot instances for zone in zones['AvailabilityZones']: resp = [zone_hists.append(zh) for zh in spot_hist if zh['AvailabilityZone'] == zone['ZoneName']] recent_price = 0.0 if zone_hists: prices = [round(float(hp['SpotPrice']),4) for hp in zone_hists] #prices = map(float, prices) price_dev = std_dev(prices) recent_price = round(float(zone_hists[0]['SpotPrice']),4) best_zone = BestAvZone(name=zone['ZoneName'], price_deviation=price_dev) else: price_dev, recent_price = 0.0, bid_price best_zone = BestAvZone(name=zone['ZoneName'], price_deviation=price_dev) # if False on first, else on second (markets_over_bid, markets_under_bid)[recent_price < bid_price].append(best_zone) stable_zone = min(markets_under_bid or markets_over_bid, key=attrgetter('price_deviation')).name # Check spot history and deduce if it is a reasonable spot price sm, avg = check_spot_prices(spot_hist) if bid_price > (avg*2.0): log.info("Bid price is twice than the average spot price of the last week:\n" " - YOURS: %s --> AVG: %s)", str(bid_price), str(avg)) bid_price = avg + ((25/100)*avg) log.info("Bidding with 25%% more than the average: %s", str(bid_price)) if bid_price <= sm: bid_price = sm + ((25/100)*sm) log.info("Bid price is %s, i.e. 25%% more than the minimum in spot pricing history", str(bid_price)) log.info("Spot request placed in %s at $s$", stable_zone, bid_price) return (stable_zone, bid_price)