def run_ES(population, i): model = population[0].genome es = EvolutionStrategy(model, get_reward, population_size=POPULATION_SIZE, sigma=0.25, learning_rate=0.03, decay=0.998, num_threads=2) es.run(5000, print_step=5, start=i) optimized = es.get_weights()
def find_shapelets_es(timeseries, labels, max_len=100, min_len=1, population_size=100, iterations=25, verbose=True, sigma=0.1, learning_rate=0.001): def cost(shapelet): return check_candidate(timeseries, labels, shapelet)[0] candidates = np.array(generate_candidates(timeseries, labels, max_len, min_len)) es = EvolutionStrategy(candidates[np.random.choice(range(len(candidates)), size=population_size)][0][0], cost, population_size=population_size, sigma=sigma, learning_rate=learning_rate) es.run(iterations, print_step=1) best_shapelet = es.get_weights() return best_shapelet
import argparse parser = argparse.ArgumentParser(description='Evolution Strategies. ') parser.add_argument('--env', default="Humanoid-v2") parser.add_argument('--render', type=bool, default=False) args = parser.parse_args() observationSpace, actionSpace = env_info(args.env) # A feed forward neural network with input size of 5, two hidden layers of size 4 and output of size 3 model = FeedForwardNetwork(layer_sizes=[observationSpace, 32, 16, actionSpace]) get_reward = make_get_reward(args.env, model, args.render) # if your task is computationally expensive, you can use num_threads > 1 to use multiple processes; # if you set num_threads=-1, it will use number of cores available on the machine; Here we use 1 process as the # task is not computationally expensive and using more processes would decrease the performance due to the IPC overhead. es = EvolutionStrategy(model.get_weights(), get_reward, population_size=20, sigma=0.1, learning_rate=0.03, decay=0.995, num_threads=1) es.run(1000, print_step=100) with open(args.env + ".pkl", 'wb') as fp: pickle.dump(es.get_weights(), fp) #while True: # print(get_reward(es.get_weights(),True))
class Agent: AGENT_HISTORY_LENGTH = 1 NUM_OF_ACTIONS = 2 POPULATION_SIZE = 15 EPS_AVG = 1 SIGMA = 0.1 LEARNING_RATE = 0.03 INITIAL_EXPLORATION = 0.0 FINAL_EXPLORATION = 0.0 EXPLORATION_DEC_STEPS = 100000 def __init__(self): self.model = Model() self.game = FlappyBird(pipe_gap=125) self.env = PLE(self.game, fps=30, display_screen=False) self.env.init() self.env.getGameState = self.game.getGameState self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE) self.exploration = self.INITIAL_EXPLORATION def get_predicted_action(self, sequence): prediction = self.model.predict(np.array(sequence)) x = np.argmax(prediction) return 119 if x == 1 else None def load(self, filename='weights.pkl'): with open(filename,'rb') as fp: self.model.set_weights(pickle.load(fp)) self.es.weights = self.model.get_weights() def get_observation(self): state = self.env.getGameState() return np.array(state.values()) def save(self, filename='weights.pkl'): with open(filename, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def play(self, episodes): self.env.display_screen = True self.model.set_weights(self.es.weights) for episode in xrange(episodes): self.env.reset_game() observation = self.get_observation() sequence = [observation]*self.AGENT_HISTORY_LENGTH done = False score = 0 while not done: action = self.get_predicted_action(sequence) reward = self.env.act(action) observation = self.get_observation() sequence = sequence[1:] sequence.append(observation) done = self.env.game_over() if self.game.getScore() > score: score = self.game.getScore() print "score: %d" % score self.env.display_screen = False def train(self, iterations): self.es.run(iterations, print_step=1) def get_reward(self, weights): total_reward = 0.0 self.model.set_weights(weights) for episode in xrange(self.EPS_AVG): self.env.reset_game() observation = self.get_observation() sequence = [observation]*self.AGENT_HISTORY_LENGTH done = False while not done: self.exploration = max(self.FINAL_EXPLORATION, self.exploration - self.INITIAL_EXPLORATION/self.EXPLORATION_DEC_STEPS) if random.random() < self.exploration: action = random.choice([119, None]) else: action = self.get_predicted_action(sequence) reward = self.env.act(action) reward += random.choice([0.0001, -0.0001]) total_reward += reward observation = self.get_observation() sequence = sequence[1:] sequence.append(observation) done = self.env.game_over() return total_reward/self.EPS_AVG
idx_map += 1 # we save the score along with the name of the run score_str = str(round(best, 2)) with open(f'{name}_{score_str}.pickle', 'wb') as handle: pickle.dump(list_data, handle, protocol=pickle.HIGHEST_PROTOCOL) # create the circuit_mapping object circuit_mapping = CircuitMapping(library_data) # create the neural net net = m.MODEL(20, library_data, args.path_json) # run the evolution strategies (ES) # note: if you want, you can play with the paramters. # those seems to give reasonable results es = EvolutionStrategy( net.model.get_weights(), get_reward, population_size=5, sigma=0.01, # noise std deviation learning_rate=0.001, decay=0.995, num_threads=1) es.run(args.n_epoch) save_dict(args.path_json, args.name, verbose=True) done = time.time() elapsed = done - start print(f'elapsed time: {elapsed}')
class Agent: AGENT_HISTORY_LENGTH = 1 POPULATION_SIZE = 25 EPS_AVG = 1 SIGMA = 0.5 LEARNING_RATE = 0.1 INITIAL_EXPLORATION = 1.0 FINAL_EXPLORATION = 0.0 EXPLORATION_DEC_STEPS = 100000 def __init__(self): self.env = gym.make('BipedalWalker-v2') self.model = Model() self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE) self.exploration = self.INITIAL_EXPLORATION def get_predicted_action(self, sequence): prediction = self.model.predict(np.array(sequence)) return prediction def load(self, filename='weights.pkl'): with open(filename, 'rb') as fp: self.model.set_weights(pickle.load(fp)) self.es.weights = self.model.get_weights() def save(self, filename='weights.pkl'): with open(filename, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def play(self, episodes, render=True): self.model.set_weights(self.es.weights) for episode in range(episodes): total_reward = 0 observation = self.env.reset() sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: if render: self.env.render() action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) print("total reward:", total_reward) def train(self, iterations): self.es.run(iterations, print_step=1) def get_reward(self, weights): total_reward = 0.0 self.model.set_weights(weights) for episode in range(self.EPS_AVG): observation = self.env.reset() sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: self.exploration = max( self.FINAL_EXPLORATION, self.exploration - self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS) if random.random() < self.exploration: action = self.env.action_space.sample() else: action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) return total_reward / self.EPS_AVG
class Agent: AGENT_HISTORY_LENGTH = 1 POPULATION_SIZE = 20 EPS_AVG = 1 SIGMA = 0.1 LEARNING_RATE = 0.01 INITIAL_EXPLORATION = 1.0 FINAL_EXPLORATION = 0.01 EXPLORATION_DEC_STEPS = 50000 plotScores = [] plotEpisodes = [] plotMaxTiles = [] plotEpiCounter = 0 GRID_SIZE = 3 action_space = [0, 1, 2, 3] def __init__(self): random.seed(int(time.time())) np.random.seed(int(time.time())) window_length = 1 nb_hidden = 256 nb_actions = 4 self.env = GameLogic(size = self.GRID_SIZE) # input_layer = Input(shape=(1, self.GRID_SIZE * self.GRID_SIZE)) # layer = Dense(8)(input_layer) # output_layer = Dense(3)(layer) # self.model = Model(input_layer, output_layer) # self.model.compile(Adam(), 'mse') self.model = Sequential() self.model.add(Flatten(input_shape=(window_length, self.GRID_SIZE * self.GRID_SIZE))) self.model.add(Dense(nb_hidden)) self.model.add(Activation('relu')) self.model.add(Dense(nb_hidden)) self.model.add(Activation('relu')) self.model.add(Dense(nb_actions, activation='linear')) print(self.model.summary()) self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE) self.exploration = self.INITIAL_EXPLORATION def get_predicted_action(self, sequence): prediction = self.model.predict(np.array(sequence)) return prediction def load(self, filename='data/weights.pkl'): self.model.load_weights(filename) self.es.weights = self.model.get_weights() def save(self, filename='data/weights.pkl'): self.model.save_weights(filename, overwrite=True) def play(self, episodes, render=True): self.model.set_weights(self.es.weights) for episode in range(episodes): total_reward = 0 observation = self.env.reset() done = False while not done: action = self.model.predict(np.array(observation)) observation, reward, done, _ = self.env.step(action) total_reward += reward print("total reward: " + str(total_reward)) def train(self, iterations): self.es.run(iterations, print_step=1) def get_reward(self, weights): total_reward = 0.0 self.model.set_weights(weights) for episode in range(self.EPS_AVG): observation = self.env.reset() observation = np.reshape(observation, [1, self.GRID_SIZE * self.GRID_SIZE]) done = False while not done: self.exploration = max(self.FINAL_EXPLORATION, self.exploration - self.INITIAL_EXPLORATION/self.EXPLORATION_DEC_STEPS) if random.random() < self.exploration: action = random.randint(0, 3) else: action = np.argmax(self.model.predict(np.array([observation]))[0]) observation, reward, done, _ = self.env.step(action) observation = np.reshape(observation, [1, self.GRID_SIZE * self.GRID_SIZE]) total_reward += reward self.plotEpiCounter += 1 self.plotEpisodes.append(self.plotEpiCounter) self.plotScores.append(self.env._score) self.plotMaxTiles.append(2**self.env._getMaxNumber()) pylab.plot(self.plotEpisodes, self.plotScores, '-b', label='Score') pylab.plot(self.plotEpisodes, self.plotMaxTiles, '-r', label='Max Tile') pylab.savefig('data/evostra_{}_{}x.png'.format(ENV_NAME, self.GRID_SIZE)) print("Game Score: {} Max Tile: {} Exploration: {}".format(self.env._score, 2**self.env._getMaxNumber(), self.exploration)) return total_reward/self.EPS_AVG
class Agent: agent_hist = 1 population = 50 eps_avg = 1 sigma = 0.2 #learning Rate lr = 0.1 init_explore = 0.9 final_explore = 0.1 explore_steps = 1E+5 def __init__(self): # Initializes environment, Model, Algorithm and Exploration self.env = gym.make(GYM_ENV) self.model = Model() self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.population, self.sigma, self.lr) self.exploration = self.init_explore def get_predicted_action(self, sequence): # Retreive the predicted action prediction = self.model.predict(np.array(sequence)) return prediction def load(self, filename='weights.pkl'): # Loads weights for agent_play with open(filename, 'rb') as fp: self.model.set_weights(pickle.load(fp)) self.es.weights = self.model.get_weights() def save(self, filename='weights.pkl'): # Saves weigths to Pickle file with open(filename, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def play(self, episodes, render=True): # Run the model in the OpenAI environment self.model.set_weights(self.es.weights) for episode in range(episodes): total_reward = 0 observation = self.env.reset() sequence = [observation] * self.agent_hist done = False while not done: if render: self.env.render() action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) print("Total reward:", total_reward) def train(self, iterations): # Begin training self.es.run(iterations, print_step=1) def get_reward(self, weights): # Initialize reward total_reward = 0.0 self.model.set_weights(weights) # Calculate reward for episode in range(self.eps_avg): observation = self.env.reset() sequence = [observation] * self.agent_hist done = False while not done: self.exploration = max( self.final_explore, self.exploration - self.init_explore / self.explore_steps) if random.random() < self.exploration: action = self.env.action_space.sample() else: action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) return total_reward / self.eps_avg
for i in range(NUM_PARAMETERS): weights['w' + str(i)] = (-W_MAX, W_MAX) weights_explore['w' + str(i)] = [-W_MAX, W_MAX] gp_params = { 'alpha': 1e-5, 'xi': 0.01 } kappa = 1.0 bo = BayesianOptimization(optimization_function_BAYESIAN, weights) bo.maximize(init_points=NUM_PARAMETERS, n_iter=99999999999, acq='ucb', kappa=kappa, **gp_params) elif SIMPLE_ES: server.player = -1 es = EvolutionStrategy(np.zeros(NUM_PARAMETERS), optimization_function_simple_es, population_size=NUM_KID, sigma=0.4, learning_rate=0.2, decay=0.98, num_threads=1) # 0.4 0.2 0.99 es.run(N_GENERATIONS, print_step=9999999999) else: if MAX_PARAMETERS: predator_sigma = PREDATOR_SIGMA_LOW wall_sigma = WALL_SIGMA_LOW PW_RATIO = PW_RATIO_LOW update_parameters(predator_sigma, wall_sigma, PW_RATIO) min_predator_fitness = 100000 best_sigma = (predator_sigma, wall_sigma, PW_RATIO) while True: if not server.playing: if player != -1: kids_fitness[player] = server.fitness
class Agent: """The agent class.""" ENV_ID = 'BipedalWalker-v2' # This is the number of the history obervations used in action prediction. AGENT_HISTORY_LENGTH = 1 POPULATION_SIZE = 20 EPS_AVG = 1 SIGMA = 0.1 LEARNING_RATE = 0.01 # The following three parameters control the exlporation probabilities. # It starts with INITIAL_EXPLORATION, ends with FINAL_EXPLORATION after # EXLPORATION_DEC_STEPS steps. INITIAL_EXPLORATION = 1.0 FINAL_EXPLORATION = 0.0 EXPLORATION_DEC_STEPS = 1000000 def __init__(self): """Initialize the agent.""" # Initialize the openai-gym environment. self.env = gym.make(self.ENV_ID) # uncomment following lines if you want to record the video # self.env = gym.wrappers.Monitor(self.env, "{}_monitor".format(self.ENV_ID), # lambda episode_id: True, force=True) # Initialze the training model. self.model = Model() # Initialize the evolution strategy of evostra self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE) self.exploration = self.INITIAL_EXPLORATION self.exploration_dec = self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS def train(self, iterations=100, print_step=1, filename='weights.pkl'): """Train the model.""" self.es.run(iterations, print_step=print_step) self.save(filename) def load(self, filename='weights.pkl'): """Load the model weights from file.""" with open(filename, 'rb') as fp: self.model.set_weights(pickle.load(fp, encoding='bytes')) self.es.weights = self.model.get_weights() def save(self, filename='weights.pkl'): """Save the weights of current model into file.""" with open(filename, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def play(self, episodes=1, render=True): """Play the agent for episodes.""" self.model.set_weights(self.es.weights) for episode in range(episodes): total_reward = 0 # Get the initial observation. observation = self.env.reset() # Fill the observation sequence with repeated initial obsercations # for AGENT_HISTORY_LENGTH times. sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: if render: # Visualize. self.env.render() action = self.get_predicted_action(sequence) # Get the results of the action. observation, reward, done, _ = self.env.step(action) total_reward += reward # Shift the observation sequence to include the new one. sequence = sequence[1:] sequence.append(observation) print("total reward: ", total_reward) def get_predicted_action(self, sequence): """Get the model's predicted action based on sequence of states.""" prediction = self.model.predict(np.array(sequence)) return prediction def get_reward(self, weights): """Get the reward of the current model based on EPS_AVG times of tests.""" total_reward = 0.0 self.model.set_weights(weights) # Run tests for EPS_AVG times. for episode in range(self.EPS_AVG): # Get the initial observation. observation = self.env.reset() # Fill the observation sequence with repeated initial obsercations # for AGENT_HISTORY_LENGTH times. sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: self.exploration = max(self.FINAL_EXPLORATION, self.exploration - self.exploration_dec) # Randomize exploration. if random.random() < self.exploration: action = self.env.action_space.sample() else: action = self.get_predicted_action(sequence) # Get the results of the action. observation, reward, done, _ = self.env.step(action) total_reward += reward # Shift the observation sequence to include the new one. sequence = sequence[1:] sequence.append(observation) return total_reward / self.EPS_AVG
prediction = Dense(2, activation='softmax')(x) model = Model(inputs=inputs, outputs=prediction) def get_reward(weights): env = gym.make("CartPole-v0") model.set_weights(weights) ob = env.reset() done = False total_reward = 0 while not done: batch = ob[np.newaxis, ...] prediction = model.predict(batch) action = np.argmax(prediction) ob, reward, done, _ = env.step(action) total_reward += reward return total_reward es = EvolutionStrategy(model.get_weights(), get_reward, population_size=100, sigma=0.1, learning_rate=0.001, render_test=False) es.run(300) model.save('cartpole.h5')
class EvolutionStrategy: def __init__(self, model, weights, env): self.model = model self.weights = weights self.POPULATION_SIZE = 20 self.SIGMA = 0.1 self.LEARNING_RATE = 0.01 self.decay = 0.999 self.env = env self.es = Eee(self.weights, self.__get_reward) def __update_weights(self): pass def __get_population_rewards(self, population_weights): # self.env.step solution = 0 # target rewards = [] for w in population_weights: reward = self.__get_reward(w) rewards.append(reward) normalized_rewards = (rewards - np.mean(rewards)) / np.std(rewards) return normalized_rewards, rewards def __get_reward(self, current_weights): self.model.set_weights(current_weights) rewards = 0 episodes = 1 for _ in range(episodes): done = False obs = self.env.reset() obs = obs.reshape([1, 4]) while not done: prediction = self.model.predict(obs) prediction = np.argmax(prediction) obs, reward, done, _ = self.env.step(prediction) # self.env.render() obs = obs.reshape([1, 4]) rewards += reward return rewards / episodes def __generate_population_weights(self): population_weights = [] for i in range(self.POPULATION_SIZE): weights_jitter = [] for w in self.weights: weights_jitter.append(np.random.randn(*w.shape) * self.SIGMA) current_weights = self.weights + weights_jitter population_weights.append(current_weights) return population_weights def update(self): self.es.run(600, print_step=1) # update the weights # generate population weights population_weights = self.__generate_population_weights() population_norm_rewards, rewards = self.__get_population_rewards( population_weights) # update self.weights for index, w in enumerate(self.weights): current_weight = np.array([ population_weight[index] for population_weight in population_weights ]) obj_func = np.dot(current_weight.T, population_norm_rewards).T self.weights[index] = w + self.LEARNING_RATE / ( self.SIGMA * self.POPULATION_SIZE) * obj_func self.LEARNING_RATE = self.LEARNING_RATE * self.decay return np.max(rewards)
class Agent: def __init__(self, model, training_steps=500, environment='BipedalWalker-v2', AGENT_HISTORY_LENGTH=1, POPULATION_SIZE=50, EPS_AVG=1, SIGMA=0.1, LEARNING_RATE=0.01, INITIAL_EXPLORATION=1.0, FINAL_EXPLORATION=0.0, EXPLORATION_DEC_STEPS=10000, num_thread=1, LR_mode=0): self.env = gym.make(environment) self.model = model self.exploration = INITIAL_EXPLORATION self.training_steps = training_steps self.AGENT_HISTORY_LENGTH = AGENT_HISTORY_LENGTH self.POPULATION_SIZE = POPULATION_SIZE self.EPS_AVG = EPS_AVG self.SIGMA = SIGMA self.LEARNING_RATE = LEARNING_RATE self.INITIAL_EXPLORATION = INITIAL_EXPLORATION self.FINAL_EXPLORATION = FINAL_EXPLORATION self.EXPLORATION_DEC_STEPS = EXPLORATION_DEC_STEPS self.num_thread = num_thread self.LR_mode = LR_mode self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE, num_threads=num_thread, LR_mode=self.LR_mode) def get_predicted_action(self, sequence): prediction = self.model.predict(np.array(sequence)) return prediction def load(self, model_file): with open(model_file, 'rb') as fp: self.model.set_weights(pickle.load(fp)) self.es.weights = self.model.get_weights() def save(self, model_file): with open(model_file, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def train(self, iterations): print('Training') self.es.run(iterations, print_step=1) optimized_weights = self.es.get_weights() self.model.set_weights(optimized_weights) def play(self, episodes, render=True): self.model.set_weights(self.es.weights) for episode in range(episodes): print('On episode number {}'.format(episode)) total_reward = 0 observation = self.env.reset() sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: if render: self.env.render() action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) print("total reward:", total_reward) def get_reward(self, weights): total_reward = 0.0 self.model.set_weights(weights) for episode in range(self.EPS_AVG): start_time = time.time() observation = self.env.reset() sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: self.exploration = max( self.FINAL_EXPLORATION, self.exploration - self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS) if random.random() < self.exploration: action = self.env.action_space.sample() else: action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) #print("total reward: ", total_reward) #print('Finished in {} seconds'.format(time.time() - start_time)) return total_reward / self.EPS_AVG