Python Stats.add_to_series示例

class Runner:
  NEW_GEN_DIST = {'mutate': 0.05, 'reproduce': 0.5}
  # Stats Keys:
  SK_LOWEST_ERROR = 'lowest_error'
  SK_BEST_INDIVIDUAL = 'best_individual'
  SK_TARGET_SAMPLES = 'target_samples'
  SK_ACTUAL_SAMPLES = 'actual_samples'  
  SK_BEST_TREE = 'best_tree'

  @classmethod
  def log(cls):
    return GP_Logger.logger(cls.__name__)

  def __init__(self, population, termination_error_threshold, max_generations, tournament_size=2):
    self.population = population
    self.termination_error_threshold = termination_error_threshold
    self.current_generation = 1
    self.current_best_error = sys.maxint
    self.max_generations = max_generations
    self.tournament_size = tournament_size
    self.stats = Stats(self.__class__.__name__)
    self.stats.init_series([self.SK_LOWEST_ERROR, self.SK_BEST_INDIVIDUAL, self.SK_TARGET_SAMPLES, self.SK_ACTUAL_SAMPLES,
                            self.SK_BEST_TREE])

  def store_target_samples(self):
    experiment = self.population[0].exp_class(*self.population[0].exp_args)
    map(lambda x:self.stats.add_to_series(self.SK_TARGET_SAMPLES, x), experiment.target_data())

  def store_actual_samples(self, individual):
    self.stats.init_series(self.SK_ACTUAL_SAMPLES)
    map(lambda x:self.stats.add_to_series(self.SK_ACTUAL_SAMPLES, x), individual.evaluate_data())

  def findIndexOfBest(self):
    return numpy.argmin(map(lambda x:x.error, self.population))

  def evaluate(self):
    self.log().debug('evaluating generation %d' % (self.current_generation))
    for individual in self.population:
      individual.evaluate()

    best = self.findIndexOfBest()
    self.log().debug('population member %d was best with %d error (target: %d)' % (best, self.population[best].error, self.termination_error_threshold))
    self.stats.add_to_series(self.SK_LOWEST_ERROR, {'error': self.population[best].error, 'index': self.current_generation}, timestamp=True)
    self.stats.add_to_series(self.SK_BEST_INDIVIDUAL, {'best_ix': best, 'index': self.current_generation}, timestamp=True)
    return self.population[best]

  def update_standings(self):
    for i in xrange(0, len(self.population), self.tournament_size):
      lowest_error = min(map(lambda x:x.error, self.population[i:i+self.tournament_size]))
      winners = filter(lambda x:x.error == lowest_error, self.population[i:i+self.tournament_size])
      losers = filter(lambda x:x.error > lowest_error, self.population[i:i+self.tournament_size])
      assert len(winners) + len(losers) == self.tournament_size, 'Expected winners (%d) + losers (%d) = tournament size (%d)' % (len(winners), len(losers), self.tournament_size)
      if len(losers) == 0:
        continue

      self.log().debug('best in tournament [%d:%d](error: %d): %d winners' % (i, i+self.tournament_size, lowest_error, len(winners)))
      map(lambda x:x.increment_standing(), winners)
      map(lambda x:x.decrement_standing(), losers)

  def random_select_n_unique(self, number, weight_list):
    selection = []
    assert number < len(weight_list), 'attemt to get %d unique values from a list of %d elements' % (number, len(weight_list))
    weight_max = weight_list[-1]

    while len(selection) < number:
      ix = bisect.bisect_right(weight_list, random.uniform(0, weight_max))
      if not ix in selection:
        selection.append(ix)

    return selection

  def generate_new_population(self):
    new_population = []
    self.update_standings()
    weight_list = list(numpy.cumsum(map(lambda x:x.standing, self.population)))
    pop_size = len(self.population)

    chosen_number = int(pop_size * self.NEW_GEN_DIST['reproduce'])
    chosen_number = chosen_number - (chosen_number % 2)
    individuals_chosen = self.random_select_n_unique(chosen_number, weight_list)
    self.log().debug('%d indiviuals chosen to reproduce - %s' % (len(individuals_chosen), sorted(individuals_chosen)))
    for ix in xrange(0, len(individuals_chosen), 2):
      new_population.append(self.population[individuals_chosen[ix]].reproduce(self.population[individuals_chosen[ix+1]]))

    chosen_number = int(pop_size * self.NEW_GEN_DIST['mutate'])
    individuals_chosen = self.random_select_n_unique(chosen_number, weight_list)
    self.log().debug('%d indiviuals chosen to mutate - %s' % (len(individuals_chosen), sorted(individuals_chosen)))
    for ix in xrange(0, len(individuals_chosen)):
      new_population.append(self.population[individuals_chosen[ix]].mutate())

    chosen_number = len(self.population) - len(new_population)
    individuals_chosen = self.random_select_n_unique(chosen_number, weight_list)
    self.log().debug('%d indiviuals chosen to clone - %s' % (len(individuals_chosen), sorted(individuals_chosen)))
    for ix in xrange(0, len(individuals_chosen)):
      new_population.append(self.population[individuals_chosen[ix]].clone())
    
    assert len(self.population) == len(new_population), 'new population size does not match original'
    self.population = new_population
    self.current_generation += 1

  def check_evaluation(self, best):
    if best.error <= self.current_best_error:
      self.current_best_error = best.error
      self.stats.add_to_series(self.SK_BEST_TREE, {'tree': best.tree.dump_structure()})
      self.store_actual_samples(best)
    return (best.error <= self.termination_error_threshold)

  def run(self):
    self.store_target_samples()
    success = self.check_evaluation(self.evaluate())
    while self.current_generation <= self.max_generations and success == False:
      self.generate_new_population()
      self.log().debug('average standing for generation %d: %f' % (self.current_generation, 
                                       numpy.average(map(lambda x:x.standing, self.population))))
      success = self.check_evaluation(self.evaluate())
    print 'success: %s' % (success)