def __call__(self, agent, render=False, max_steps=2000, exceed_reward=0):
        fit = 0
        dist = 0
        for i in range(len(self.generalist_points)):
            d_inv = (np.linalg.norm(agent.value - self.generalist_points[i]) +
                     1e-6)**-2
            dist += d_inv
            fit += d_inv * self.generalist_fit[i]
        for i in range(len(self.specialist_points)):
            d_inv = (np.linalg.norm(agent.value - self.specialist_points[i]) +
                     1e-6)**-2
            dist += d_inv
            fit += d_inv * self.specialist_fit[i]

        fit /= dist
        return Configuration.metric(agent, self, fit, [])
    def __call__(self,
                 agent,
                 render=False,
                 use_state_path=False,
                 max_steps=2000,
                 exceed_reward=0):
        """
        Runs an entire episode of one agent over this environment.

        This method works in compliance with gym.Env methods :
            .reset()
            .render()
            .step(action)

        The execution stops at 'max_steps' iterations if not stopped otherwise, giving 'exceed_reward' to the agent.
        An observer is a function acting on the path taken by the agent, returning an observation.
        """
        state = self.reset()
        done = False

        fitness = 0
        path = list()
        count = 0
        while not done:
            if render:
                self.render()

            action = agent.choose_action(state)
            state, reward, done, info = self.step(action)

            if use_state_path:
                path.append(state)

            fitness += reward
            count += 1
            if count > max_steps:
                fitness += exceed_reward
                break

        return Configuration.metric(agent, self, fitness, path)
'parent_folder/algorithm{1-2}/Iteration0.pickle'.

Results are saved as a dictionary under 'parent_folder/Results.pickle'.

"""
import sys
import os
import pickle
sys.path.append("..")

import ipyparallel as ipp
import numpy as np
from Parameters import Configuration
from AnalysisTools.ExtractAgents import load_agents_last_iteration

Configuration.make()

nb_envs = 100  # Amount of randomly generated Environments

# Ipyparallel ----------------------------------------------------------------------------------------------------------
# Local parallelism, make sure that ipcluster is started beforehand otherwise this will raise an error.
Configuration.rc = ipp.Client()
with Configuration.rc[:].sync_imports():
    from Parameters import Configuration
Configuration.rc[:].execute("Configuration.make()")
Configuration.lview = Configuration.rc.load_balanced_view()
Configuration.lview.block = True

# Generating environments ----------------------------------------------------------------------------------------------

print(f"Generating {nb_envs} new environments ...")
示例#4
0
    def __call__(self,
                 agent,
                 render=False,
                 use_state_path=False,
                 max_steps=12000,
                 exceed_reward=0):
        self.balls = self.init_balls.copy()
        self.add_balls()
        if render and not self.windows_alive:
            self.env.enable_display()
        state = self.env.reset()
        state.append(0.0)
        if len(agent.choose_action(state)) != 3:
            return AssertionError(
                "The current agent returned an action of length != 3. Aborting."
            )
        done = False

        fitness = 0.0

        is_stuck_x = deque()
        is_stuck_y = deque()

        path = list()
        count = 0
        while not done:
            if render:
                self.env.render()
                time.sleep(0.01)
            action = agent.choose_action(state)
            holding = action[2] > 0

            state, reward, done, info = self.env.step(
                (action[0] * 2.0, action[1] * 2.0))
            state.append(1.0 if self.ball_held != -1 else 0.0)

            self.pos = (self.env.get_robot_pos()[0],
                        self.env.get_robot_pos()[1])

            reward = 0.0  # default reward is distance to goal

            if holding:
                reward += self.catch()

            if not holding:
                reward += self.release()

            fitness += reward

            if count % 150 == 0 and 900 >= count > 0:
                path.append(self.pos[0])
                path.append(self.pos[1])

            if count % 50 == 0 and count >= 900:
                if np.array(is_stuck_x).std() + np.array(
                        is_stuck_y).std() < 10:
                    break

            if len(self.balls) == 0 and count >= 900:
                break

            if len(is_stuck_x) == 200:
                is_stuck_x.popleft()
                is_stuck_x.popleft()
                is_stuck_y.popleft()
                is_stuck_y.popleft()
            is_stuck_x.append(self.pos[0])
            is_stuck_y.append(self.pos[1])

            count += 1
            if count >= max_steps:
                fitness += exceed_reward
                break
        return Configuration.metric(agent, self, fitness, path)
示例#5
0
 def __call__(self, agent, render=False, max_steps=2000, exceed_reward=0):
     return Configuration.metric(agent, self, self.map(agent.value, self.y_value), [agent.value])
示例#6
0
from Parameters import Configuration
from Algorithms.POET.Mutation import mutate_envs
from Algorithms.POET.LocalTraining import ES_Step
from Algorithms.POET.Transfer import Evaluate_Candidates
from Utils.Loader import resume_from_folder, prepare_folder
from Utils.Stats import bundle_stats, append_stats
import numpy as np
import ipyparallel as ipp
import argparse
import json
import os
import pickle
import warnings
warnings.filterwarnings("ignore")

Configuration.make()  # Differed imports

# Ipyparallel --------------------------------------------------------------------------------------------------
# Local parallelism, make sure that ipcluster is started beforehand otherwise this will raise an error.
Configuration.rc = ipp.Client()
with Configuration.rc[:].sync_imports():
    from Parameters import Configuration
Configuration.rc[:].execute("Configuration.make()")
Configuration.lview = Configuration.rc.load_balanced_view()
Configuration.lview.block = True

# Parse arguments ------------------------------------------------------------------------------------------------------

parser = argparse.ArgumentParser(
    description=
    'POET Enhanced Implementation as in Wang, rui and Lehman, Joel, and Clune,'