def average_best_evals(self, n): """ Return the average of the n last best evaluations of the goal function. This is a fast function which uses the last evaluations already done by the SPSA algorithm to return an approximation of the current goal value (note that we do not call the goal function another time, so the returned value is an upper bound of the true value). """ assert (self.best_count > 0), "not enough evaluations in average_evaluations!" if n <= 0: n = 1 if n > 1000: n = 1000 if n > self.best_count: n = self.best_count sum_eval = 0.0 sum_theta = utils.linear_combinaison(0.0, self.theta0) for i in range(n): j = ((self.best_count - 1) % 1000) - i if j < 0: j += 1000 if j >= 1000: j -= 1000 sum_eval += self.best_eval[j] sum_theta = utils.sum(sum_theta, self.best_theta[j]) # return the average alpha = 1.0 / (1.0 * n) return (alpha * sum_eval, utils.linear_combinaison(alpha, sum_theta))
def approximate_gradient(self, theta, c): ''' Return an approximation of the gradient of f at point theta. On repeated calls, the esperance of the series of returned values converges almost surely to the true gradient of f at theta. ''' if self.history_count > 0: current_goal, _ = self.average_evaluations(30) else: current_goal = 100000000000000000.0 bernouilli = self.create_bernouilli(theta) count = 0 while True: state = random.getstate() theta1 = utils.linear_combinaison(1.0, theta, c, bernouilli) f1 = self.evaluate_goal(theta1) random.setstate(state) theta2 = utils.linear_combinaison(1.0, theta, -c, bernouilli) f2 = self.evaluate_goal(theta2) if f1 != f2: break count = count + 1 if count >= 100: break #Update the gradient gradient = {} for (name, value) in theta.items(): gradient[name] = (f1 - f2) / (2.0 * c * bernouilli[name]) if (f1 > current_goal) and (f2 > current_goal): print('function seems not decreasing') gradient = utils.linear_combinaison(0.1, gradient) gradient = utils.linear_combinaison(0.1, gradient, 0.9, self.previous_gradient) self.previous_gradient = gradient #Store the best the two evals f1 and f2 (or both) if (f1 <= current_goal): self.best_eval[self.best_count % 1000] = f1 self.best_theta[self.best_count % 1000] = theta1 self.best_count += 1 if (f2 <= current_goal): self.best_eval[self.best_count % 1000] = f2 self.best_theta[self.best_count % 1000] = theta2 self.best_count += 1 #Return the estimation of the new gradient return gradient
def create_bernouilli(self, m): """ Create a random direction to estimate the stochastic gradient. We use a Bernouilli distribution : bernouilli = (+1,+1,-1,+1,-1,.....) """ bernouilli = copy.deepcopy(m) for (name, value) in m.items(): bernouilli[name]['value'] = 1 if random.randint(0, 1) else -1 g = utils.norm2(self.previous_gradient) d = utils.norm2(bernouilli) if g > 0.00001: bernouilli = utils.linear_combinaison(0.55, bernouilli, 0.25 * d / g, self.previous_gradient) for (name, value) in m.items(): if bernouilli[name]['value'] == 0.0: bernouilli[name][value] = 0.2 if abs(bernouilli[name]['value']) < 0.2: bernouilli[name]['value'] = 0.2 * utils.sign_of( bernouilli[name]['value']) return bernouilli
def run(self): ''' Return a point which is (hopefully) a minimizer of the goal function f, starting from point theta0 Returns: the point (as a dict) which is (hopefully) a minimize of 'f'. ''' k = 0 theta = self.theta0 while True: if self.constraints is not None: theta = self.constraints(theta) print("theta = " + utils.pretty(theta)) c_k = self.c / ((k + 1)**self.gamma) a_k = self.a / ((k + 1 + self.A)**self.alpha) gradient = self.approximate_gradient(theta, c_k) #For steepest descent we update via a constant small step in the gradient direction mu = -0.01 / max(1.0, utils.norm2(gradient)) theta = utils.linear_combinaison(1.0, theta, mu, gradient) ## For RPROP, we update with information about the sign of the gradients theta = utils.linear_combinaison(1.0, theta, -0.01, self.rprop(theta, gradient)) #We then move to the point which gives the best average of goal (avg_goal, avg_theta) = self.average_best_evals(30) theta = utils.linear_combinaison(0.8, theta, 0.2, avg_theta) k = k + 1 if k >= self.max_iter: break if (k % 100 == 0) or (k <= 1000): (avg_goal, avg_theta) = self.average_evaluations(30) print("iter = " + str(k)) print("mean goal (all) = " + str(avg_goal)) print("mean theta (all) = " + utils.pretty(avg_theta)) (avg_goal, avg_theta) = self.average_best_evals(30) print('mean goal (best) = ' + str(avg_goal)) print('mean theta (best) = ' + utils.pretty(avg_theta)) print( '-----------------------------------------------------------') return theta
def average_best_evals(self, n): ''' Return the average of the n last best evaluation of the goal function. ''' if n <= 0: n = 1 if n > 1000: n = 1000 if n > self.best_count: n = self.best_count sum_eval = 0.0 sum_theta = utils.linear_combinaison(0.0, self.theta0) for i in range(n): j = ((self.best_count - 1) % 1000) - i if j < 0: j += 1000 if j >= 1000: j -= 1000 sum_eval += self.best_eval[j] sum_theta = utils.sum(sum_theta, self.best_theta[j]) #return the average alpha = 1.0 / (1.0 * n) return (alpha * sum_eval, utils.linear_combinaison(alpha, sum_theta))
def average_evaluations(self, n): """ Return the average of the n last evaluations of the goal function. This is a fast function which uses the last evaluations already done by the SPSA algorithm to return an approximation of the current goal value (note that we do not call the goal function another time, so the returned value is an upper bound of the true value). """ assert (self.history_count > 0), "not enough evaluations in average_evaluations!" n = max(1, min(1000, n)) n = min(n, self.history_count) # print(f'n = {n}') # print(f'hist_cnt = {self.history_count}') sum_eval = 0.0 sum_theta = utils.linear_combinaison(0.0, self.theta0) for i in range(n): j = ((self.history_count - 1) % 1000) - i if j < 0: j += 1000 if j >= 1000: j -= 1000 # print(f'i={i}, j={j}, hist_cnt: {self.history_count}, hist_eval[{j}] = {self.history_eval[j]}') sum_eval += self.history_eval[j] sum_theta = utils.sum(sum_theta, self.history_theta[j]) # return the average alpha = 1.0 / (1.0 * n) return (alpha * sum_eval, utils.linear_combinaison(alpha, sum_theta))
def create_bernouilli(self, m): ''' Create a random direction to estimate the stochastic gradient. ''' bernouilli = {} for (name, value) in m.items(): bernouilli[name] = 1 if random.randint(0, 1) else -1 g = utils.norm2(self.previous_gradient) d = utils.norm2(bernouilli) if g > 0.00001: bernouilli = utils.linear_combinaison(0.55, bernouilli, 0.25 * d / g, self.previous_gradient) for (name, value) in m.items(): if bernouilli[name] == 0.0: bernouilli[name] = 0.2 if abs(bernouilli[name]) < 0.2: bernouilli[name] = 0.2 * utils.sign_of(bernouilli[name]) return bernouilli
def run(self): """ Return a point which is (hopefully) a minimizer of the goal function f, starting from point theta0. Returns: The point (as a dict) which is (hopefully) a minimizer of "f". """ k = 0 theta = self.theta0 while True: k = k + 1 self.iter = k if self.constraints is not None: theta = self.constraints(theta) #print("theta = " + utils.pretty(theta)) c_k = self.c / (k**self.gamma) a_k = self.a / ((k + self.A)**self.alpha) gradient = self.approximate_gradient(theta, c_k) #print(str(k) + " gradient = " + utils.pretty(gradient)) # if k % 1000 == 0: # print(k + utils.pretty(theta) + "norm2(g) = " + str(utils.norm2(gradient))) # print(k + " theta = " + utils.pretty(theta)) ## For SPSA we update with a small step (theta = theta - a_k * gradient) ## theta = utils.linear_combinaison(1.0, theta, -a_k, gradient) ## For steepest descent we update via a constant small step in the gradient direction mu = -0.01 / max(1.0, utils.norm2(gradient)) theta = utils.linear_combinaison(1.0, theta, mu, gradient) ## For RPROP, we update with information about the sign of the gradients theta = utils.linear_combinaison(1.0, theta, -0.01, self.rprop(theta, gradient)) ## We then move to the point which gives the best average of goal (avg_goal, avg_theta) = self.average_best_evals(30) theta = utils.linear_combinaison(0.98, theta, 0.02, avg_theta) if (k % 10 == 0): (avg_goal, avg_theta) = self.average_evaluations(30) print("iter = " + str(k)) print("mean goal (all) = " + str(avg_goal)) print("mean theta (all) = " + utils.pretty(avg_theta)) (avg_goal, avg_theta) = self.average_best_evals(30) print("mean goal (best) = " + str(avg_goal)) print("mean theta (best) = " + utils.pretty(avg_theta)) print( "-----------------------------------------------------------------" ) if k >= self.max_iter: break return theta
def approximate_gradient(self, theta, c): """ Return an approximation of the gradient of f at point theta. On repeated calls, the esperance of the series of returned values converges almost surely to the true gradient of f at theta. """ if self.history_count > 0: current_goal, _ = self.average_evaluations(30) else: current_goal = 100000000000000000.0 bernouilli = self.create_bernouilli(theta) count = 0 while True: # Calculate two evaluations of f at points M + c * bernouilli and # M - c * bernouilli to estimate the gradient. We do not want to # use a null gradient, so we loop until the two functions evaluations # are different. Another trick is that we use the same seed for the # random generator for the two function evaluations, to reduce the # variance of the gradient if the evaluations use simulations (like # in games). state = random.getstate() theta1 = utils.linear_combinaison(1.0, theta, c, bernouilli) f1 = self.evaluate_goal(theta1) random.setstate(state) theta2 = utils.linear_combinaison(1.0, theta, -c, bernouilli) f2 = self.evaluate_goal(theta2) if f1 != f2: break count = count + 1 if count >= 100: # print("too many evaluation to find a gradient, function seems flat") break # Update the gradient gradient = {} for (name, value) in theta.items(): gradient[name] = (f1 - f2) / (2.0 * c * bernouilli[name]) if (f1 > current_goal) and (f2 > current_goal): print("function seems not decreasing") gradient = utils.linear_combinaison(0.1, gradient) # For the correction factor used in the running average for the gradient, # see the paper "Adam: A Method For Stochastic Optimization, Kingma and Lei Ba" beta = 0.9 correction = 1.0 / 1.0 - pow(beta, self.iter) gradient = utils.linear_combinaison((1 - beta), gradient, beta, self.previous_gradient) gradient = utils.linear_combinaison(correction, gradient) # Store the current gradient for the next time, to calculate the running average self.previous_gradient = gradient # Store the best the two evals f1 and f2 (or both) if (f1 <= current_goal): self.best_eval[self.best_count % 1000] = f1 self.best_theta[self.best_count % 1000] = theta1 self.best_count += 1 if (f2 <= current_goal): self.best_eval[self.best_count % 1000] = f2 self.best_theta[self.best_count % 1000] = theta2 self.best_count += 1 # Return the estimation of the new gradient return gradient
def approximate_gradient(self, theta, c, iter): """ Return an approximation of the gradient of f at point theta. On repeated calls, the esperance of the series of returned values converges almost surely to the true gradient of f at theta. """ true_theta = utils.true_param(theta) if self.history_count > 0: current_goal, _ = self.average_evaluations(30) else: current_goal = SPSA_minimization.BAD_GOAL logging.info(f'{__file__} > current_goal: {current_goal}') print( f'current optimizer mean goal: {current_goal:0.5f} (low is better, lowest: -1.0, highest: 1.0)' ) # print(f'Sample, optimizer goal = -(engine match score) or -(3.0 pts/4 games) or -0.75') bernouilli = self.create_bernouilli(theta) count = 0 while True: logging.info( f'{__file__} Apply bernouilli term to theta, theta={theta}, c={c}, bernouilli={bernouilli}' ) # Calculate two evaluations of f at points M + c * bernouilli and # M - c * bernouilli to estimate the gradient. We do not want to # use a null gradient, so we loop until the two functions evaluations # are different. Another trick is that we use the same seed for the # random generator for the two function evaluations, to reduce the # variance of the gradient if the evaluations use simulations (like # in games). state = random.getstate() theta1 = utils.linear_combinaison(1.0, theta, c, bernouilli) logging.info(f'{__file__} theta1: {theta1}') # Apply parameter limits logging.info( f'{__file__} > Apply limits to theta1 before sending to engine' ) theta1 = utils.apply_limits(theta1) logging.info(f'{__file__} theta1 with limits: {theta1}') logging.info(f'{__file__} > run 1st match with theta1: {theta1}') random.setstate(state) theta2 = utils.linear_combinaison(1.0, theta, -c, bernouilli) logging.info(f'{__file__} theta2: {theta2}') # Apply parameter limits logging.info( f'{__file__} > Apply limits to theta2 before sending to engine' ) theta2 = utils.apply_limits(theta2) logging.info(f'{__file__} theta2 with limits: {theta2}') logging.info(f'{__file__} > run 2nd match with theta2: {theta2}') # Run the 2 matches in parallel after iteration 1. manager = multiprocessing.Manager() res = manager.dict() thetas = [theta1, theta2] if iter < self.iter_parallel_start: print('Run match 1 ...') true_param = utils.true_param(theta1) print('test_engine param:') for (name, val), (name1, val1) in zip(true_param.items(), true_theta.items()): print( f' {name}: {val["value"]}, ({val["value"] - val1["value"]:+})' ) print('base_engine param:') for name, val in utils.true_param(theta).items(): print(f' {name}: {val["value"]}') t1 = time.perf_counter() f1 = self.evaluate_goal(theta1, theta, 0, res, iter) logging.info(f'f1 elapse: {time.perf_counter() - t1:0.2f}s') print( f'Done match 1!, elapse: {time.perf_counter() - t1:0.2f}sec' ) print(f'goal after match 1: {f1:0.5f}') # Run match 2 print('Run match 2 ...') true_param = utils.true_param(theta2) print('test_engine param:') for (name, val), (name1, val1) in zip(true_param.items(), true_theta.items()): print( f' {name}: {val["value"]}, ({val["value"] - val1["value"]:+})' ) print('base_engine param:') for name, val in utils.true_param(theta).items(): print(f' {name}: {val["value"]}') t1 = time.perf_counter() f2 = self.evaluate_goal(theta2, theta, 1, res, iter) logging.info(f'f2 elapse: {time.perf_counter() - t1:0.2f}s') print( f'Done match 2!, elapse: {time.perf_counter() - t1:0.2f}sec' ) print(f'goal after match 2: {f2:0.5f}') print('Done engine match!') else: print('Run 2 matches in parallel ...') t1 = time.perf_counter() jobs = [] for i in range(2): print(f'Run match {i + 1} ...') true_param = utils.true_param(thetas[i]) print('test_engine param:') for (name, val), (name1, val1) in zip(true_param.items(), true_theta.items()): print( f' {name}: {val["value"]}, ({val["value"] - val1["value"]:+})' ) print('base_engine param:') for name, val in utils.true_param(theta).items(): print(f' {name}: {val["value"]}') p = multiprocessing.Process(target=self.evaluate_goal, args=(thetas[i], theta, i, res, iter)) jobs.append(p) p.start() for num, proc in enumerate(jobs): proc.join() # If match is done in parallel, update the history count, eval and theta here. self.history_eval[self.history_count % 1000] = res.values()[num] self.history_theta[self.history_count % 1000] = thetas[num] self.history_count += 1 print( f'Done match {num + 1}!, elapse: {time.perf_counter() - t1:0.2f}sec' ) logging.info( f'parallel elapse: {time.perf_counter() - t1:0.2f}s') print('Done engine match!') f1, f2 = res.values()[0], res.values()[1] logging.info(f'{__file__} > f1: {f1}, f2: {f2}') print(f'optimizer goal after match 1: {f1:0.5f} (low is better)') print(f'optimizer goal after match 2: {f2:0.5f} (low is better)') if f1 != f2: break print('perf is the same in match 1 and 2, launch new matches ...') count = count + 1 logging.info( f'{__file__} > f1 and f2 are the same, try the engine match again. num_tries = {count}' ) if count >= 100: logging.info( f'{__file__} > too many evaluation to find a gradient, function seems flat' ) break # Update the gradient gradient = copy.deepcopy(theta) # print(f'Basic gradient after 2 engine matches:') for name, value in theta.items(): gradient[name]['value'] = (f1 - f2) / (2.0 * c * bernouilli[name]['value']) # print(f' {name}: {gradient[name]["value"]}') logging.info(f'{__file__} > {name} gradient: {gradient}') if (f1 > current_goal) and (f2 > current_goal): logging.info(f'{__file__} > function seems not decreasing') gradient = utils.linear_combinaison(0.1, gradient) print('Modify the gradient because the results of engine matches\n' 'did not improve when using the new param. But we will not\n' 're-run the engine matches.') print('Modified gradient at alpha=0.1:') for n, v in gradient.items(): print(f' {n}: {v["value"]}') # For the correction factor used in the running average for the gradient, # see the paper "Adam: A Method For Stochastic Optimization, Kingma and Lei Ba" beta = 0.9 correction = 1.0 / 1.0 - pow(beta, self.iter) gradient = utils.linear_combinaison((1 - beta), gradient, beta, self.previous_gradient) gradient = utils.linear_combinaison(correction, gradient) # print('New gradient after applying correction:') # for n, v in gradient.items(): # print(f' {n}: {v["value"]}') # Store the current gradient for the next time, to calculate the running average self.previous_gradient = gradient # Store the best the two evals f1 and f2 (or both) if (f1 <= current_goal): self.best_eval[self.best_count % 1000] = f1 self.best_theta[self.best_count % 1000] = theta1 self.best_count += 1 if (f2 <= current_goal): self.best_eval[self.best_count % 1000] = f2 self.best_theta[self.best_count % 1000] = theta2 self.best_count += 1 logging.info(f'{__file__} > final gradient: {gradient}') # Return the estimation of the new gradient return gradient
def run(self): """ Return a point which is (hopefully) a minimizer of the goal function f, starting from point theta0. Returns: The point (as a dict) which is (hopefully) a minimizer of "f". """ is_spsa = True is_steep_descent = False is_rprop = False k = 0 theta = self.theta0 while True: k = k + 1 self.iter = k print(f'starting iter {k} ...') if self.constraints is not None: theta = self.constraints(theta) print('current param:') for name, value in utils.true_param(theta).items(): print(f' {name}: {value["value"]}') c_k = self.c / (k**self.gamma) a_k = self.a / ((k + self.A)**self.alpha) # print(f' ck: {c_k:0.5f}') # print(f' ak: {a_k:0.5f}') # Run the engine match here to get the gradient print('Run engine match ...') gradient = self.approximate_gradient(theta, c_k, k) # For SPSA we update with a small step (theta = theta - a_k * gradient) if is_spsa: theta = utils.linear_combinaison(1.0, theta, -a_k, gradient) logging.info(f'{__file__} > theta from spsa: {theta}') # print(f'new param after application of gradient:') # for n, v in theta.items(): # print(f' {n}: {int(v["value"] * v["factor"])}') # For steepest descent we update via a constant small step in the gradient direction elif is_steep_descent: mu = -0.01 / max(1.0, utils.norm2(gradient)) theta = utils.linear_combinaison(1.0, theta, mu, gradient) # For RPROP, we update with information about the sign of the gradients elif is_rprop: theta = utils.linear_combinaison(1.0, theta, -0.01, self.rprop(theta, gradient)) # Apply parameter limits theta = utils.apply_limits(theta) logging.info(f'{__file__} > theta with limits: {theta}') # print(f'new param after application of limits:') # for n, v in theta.items(): # print(f' {n}: {int(v["value"] * v["factor"])}') # We then move to the point which gives the best average of goal (avg_goal, avg_theta) = self.average_best_evals(30) logging.info( f'{__file__} > avg_theta from average_best_evals: {avg_theta}') theta = utils.linear_combinaison(0.98, theta, 0.02, avg_theta) logging.info(f'{__file__} > theta with avg_theta: {theta}') # print(f'new param after application of best average param:') # for n, v in theta.items(): # print(f' {n}: {int(v["value"] * v["factor"])}') # Apply parameter limits theta = utils.apply_limits(theta) # This is the best param. logging.info(f'{__file__} > best param: {theta}') # print(f'new param after application of limits:') # for n, v in theta.items(): # print(f' {n}: {int(v["value"] * v["factor"])}') # Log best param values for kv, vv in theta.items(): logging.info( f'<best> iter: {k}, param: {kv}, value: {int(vv["value"]*vv["factor"])}' ) print('best param:') for n, v in theta.items(): print(f' {n}: {int(v["value"] * v["factor"])}') mean_all_goal, _ = self.average_evaluations(30) print(f'mean all goal: {mean_all_goal}') mean_best_goal, _ = self.average_best_evals(30) print(f'mean best goal: {mean_best_goal}') # Save data in csv for plotting. plot_data = {} plot_data.update({'iter': k}) plot_data.update({'meanbestgoal': mean_best_goal}) plot_data.update({'meanallgoal': mean_all_goal}) plot_theta = utils.true_param(theta) for name, value in plot_theta.items(): plot_data.update({name: value["value"]}) with open(self.plot_data_file, 'a') as f: cnt = 0 for name, value in plot_data.items(): cnt += 1 if cnt == len(plot_data): f.write(f'{value}\n') else: f.write(f'{value},') print(f'done iter {k} / {self.max_iter}') logging.info(f'{__file__} > done iter {k} / {self.max_iter}') print('=========================================') # Stopping rule 1: Average goal and iteration meet the # stop_all_mean_goal and stop_min_iter criteria. if k >= self.stop_min_iter and mean_all_goal <= self.stop_all_mean_goal: print('Stop opimization due to good average all goal!') break # Stopping rule 2: Average best goal and iteration meet the # stop_best_mean_goal and stop_min_iter criteria. if k >= self.stop_min_iter and mean_best_goal <= self.stop_best_mean_goal: print('Stop opimization due to good average best goal!') break # Stopping rule 3: Max iteration is reached. if k >= self.max_iter: print('Stop opimization due to max iteration!') break return utils.true_param(theta)