def __call__(self, agent, render=False, max_steps=2000, exceed_reward=0): fit = 0 dist = 0 for i in range(len(self.generalist_points)): d_inv = (np.linalg.norm(agent.value - self.generalist_points[i]) + 1e-6)**-2 dist += d_inv fit += d_inv * self.generalist_fit[i] for i in range(len(self.specialist_points)): d_inv = (np.linalg.norm(agent.value - self.specialist_points[i]) + 1e-6)**-2 dist += d_inv fit += d_inv * self.specialist_fit[i] fit /= dist return Configuration.metric(agent, self, fit, [])
def __call__(self, agent, render=False, use_state_path=False, max_steps=2000, exceed_reward=0): """ Runs an entire episode of one agent over this environment. This method works in compliance with gym.Env methods : .reset() .render() .step(action) The execution stops at 'max_steps' iterations if not stopped otherwise, giving 'exceed_reward' to the agent. An observer is a function acting on the path taken by the agent, returning an observation. """ state = self.reset() done = False fitness = 0 path = list() count = 0 while not done: if render: self.render() action = agent.choose_action(state) state, reward, done, info = self.step(action) if use_state_path: path.append(state) fitness += reward count += 1 if count > max_steps: fitness += exceed_reward break return Configuration.metric(agent, self, fitness, path)
'parent_folder/algorithm{1-2}/Iteration0.pickle'. Results are saved as a dictionary under 'parent_folder/Results.pickle'. """ import sys import os import pickle sys.path.append("..") import ipyparallel as ipp import numpy as np from Parameters import Configuration from AnalysisTools.ExtractAgents import load_agents_last_iteration Configuration.make() nb_envs = 100 # Amount of randomly generated Environments # Ipyparallel ---------------------------------------------------------------------------------------------------------- # Local parallelism, make sure that ipcluster is started beforehand otherwise this will raise an error. Configuration.rc = ipp.Client() with Configuration.rc[:].sync_imports(): from Parameters import Configuration Configuration.rc[:].execute("Configuration.make()") Configuration.lview = Configuration.rc.load_balanced_view() Configuration.lview.block = True # Generating environments ---------------------------------------------------------------------------------------------- print(f"Generating {nb_envs} new environments ...")
def __call__(self, agent, render=False, use_state_path=False, max_steps=12000, exceed_reward=0): self.balls = self.init_balls.copy() self.add_balls() if render and not self.windows_alive: self.env.enable_display() state = self.env.reset() state.append(0.0) if len(agent.choose_action(state)) != 3: return AssertionError( "The current agent returned an action of length != 3. Aborting." ) done = False fitness = 0.0 is_stuck_x = deque() is_stuck_y = deque() path = list() count = 0 while not done: if render: self.env.render() time.sleep(0.01) action = agent.choose_action(state) holding = action[2] > 0 state, reward, done, info = self.env.step( (action[0] * 2.0, action[1] * 2.0)) state.append(1.0 if self.ball_held != -1 else 0.0) self.pos = (self.env.get_robot_pos()[0], self.env.get_robot_pos()[1]) reward = 0.0 # default reward is distance to goal if holding: reward += self.catch() if not holding: reward += self.release() fitness += reward if count % 150 == 0 and 900 >= count > 0: path.append(self.pos[0]) path.append(self.pos[1]) if count % 50 == 0 and count >= 900: if np.array(is_stuck_x).std() + np.array( is_stuck_y).std() < 10: break if len(self.balls) == 0 and count >= 900: break if len(is_stuck_x) == 200: is_stuck_x.popleft() is_stuck_x.popleft() is_stuck_y.popleft() is_stuck_y.popleft() is_stuck_x.append(self.pos[0]) is_stuck_y.append(self.pos[1]) count += 1 if count >= max_steps: fitness += exceed_reward break return Configuration.metric(agent, self, fitness, path)
def __call__(self, agent, render=False, max_steps=2000, exceed_reward=0): return Configuration.metric(agent, self, self.map(agent.value, self.y_value), [agent.value])
from Parameters import Configuration from Algorithms.POET.Mutation import mutate_envs from Algorithms.POET.LocalTraining import ES_Step from Algorithms.POET.Transfer import Evaluate_Candidates from Utils.Loader import resume_from_folder, prepare_folder from Utils.Stats import bundle_stats, append_stats import numpy as np import ipyparallel as ipp import argparse import json import os import pickle import warnings warnings.filterwarnings("ignore") Configuration.make() # Differed imports # Ipyparallel -------------------------------------------------------------------------------------------------- # Local parallelism, make sure that ipcluster is started beforehand otherwise this will raise an error. Configuration.rc = ipp.Client() with Configuration.rc[:].sync_imports(): from Parameters import Configuration Configuration.rc[:].execute("Configuration.make()") Configuration.lview = Configuration.rc.load_balanced_view() Configuration.lview.block = True # Parse arguments ------------------------------------------------------------------------------------------------------ parser = argparse.ArgumentParser( description= 'POET Enhanced Implementation as in Wang, rui and Lehman, Joel, and Clune,'