Exemplo n.º 1
0
def main():

    env = ForagerEnv()
    reward = 0
    done = False
    if RENDER:
        env.render()  # If visualization is desired
    else:
        print("Running simulation...")
    steps = 0
    start_time = timeit.default_timer()
    for _ in range(num_trials):
        observation = env.reset()
        while True:
            run_results = agent_comp.run(
                inputs={
                    player_obs: [observation[player_coord_slice]],
                    predator_obs: [observation[predator_coord_slice]],
                    prey_obs: [observation[prey_coord_slice]],
                    Panicky_control_mech: [0]
                    # values:[observation[player_value_idx],observation[prey_value_idx],observation[predator_value_idx]],
                    # reward:[reward],
                })
            action = np.where(run_results[0] == 0, 0,
                              run_results[0] / np.abs(run_results[0]))
            observation, reward, done, _ = env.step(action)
            steps += 1
            if done:
                break
    stop_time = timeit.default_timer()
    print(
        f'{steps / (stop_time - start_time):.1f} steps/second, {steps} total steps in '
        f'{stop_time - start_time:.2f} seconds')
    if RENDER:
        env.render()  # If visualization is desired
Exemplo n.º 2
0
def main():
    env = ForagerEnv()
    reward = 0
    done = False

    def my_print():
        print(ocm.net_outcome)

    if RENDER:
        env.render()  # If visualization is desired
    else:
        print("Running simulation...")
    steps = 0
    start_time = timeit.default_timer()
    for _ in range(num_trials):
        observation = env.reset()
        while True:
            if PNL_COMPILE:
                BIN_EXECUTE = 'LLVM'
            else:
                BIN_EXECUTE = 'Python'
            run_results = agent_comp.run(inputs={
                player_obs: [observation[player_coord_slice]],
                predator_obs: [observation[predator_coord_slice]],
                prey_obs: [observation[prey_coord_slice]],
            },
                                         call_after_trial=my_print,
                                         bin_execute=BIN_EXECUTE)

            action = np.where(run_results[0] == 0, 0,
                              run_results[0] / np.abs(run_results[0]))
            # action = np.squeeze(np.where(greedy_action_mech.value==0,0,
            #                              greedy_action_mech.value[0]/np.abs(greedy_action_mech.value[0])))
            observation, reward, done, _ = env.step(action)
            print('OCM ControlSignals:')
            print(
                '\n\tOutcome: {}\n\tPlayer OBS: {}\n\tPredator OBS: {}\n\tPrey OBS: {}'
                .format(ocm._objective_mechanism.value,
                        ocm.control_signals[0].value,
                        ocm.control_signals[1].value,
                        ocm.control_signals[2].value))
            for sample, value in zip(ocm.saved_samples, ocm.saved_values):
                print('\n\t\tSample: {} Value: {}'.format(sample, value))
            print(
                '\n\tOutcome: {}\n\tPlayer OBS: {}\n\tPredator OBS: {}\n\tPrey OBS: {}'
                .format(ocm._objective_mechanism.value,
                        ocm.control_signals[0].value,
                        ocm.control_signals[1].value,
                        ocm.control_signals[2].value))
            if done:
                break
    stop_time = timeit.default_timer()
    print(
        f'{steps / (stop_time - start_time):.1f} steps/second, {steps} total steps in '
        f'{stop_time - start_time:.2f} seconds')
    if RENDER:
        env.render()  # If visualization is desired
Exemplo n.º 3
0
    def __init__(self):

        self.seed = int.from_bytes(os.urandom(4), byteorder="big")

        from psyneulink.core.globals.utilities import set_global_seed
        set_global_seed(self.seed)
        np.random.seed(self.seed+1)

        # Setup a Gym Forager environment for the game
        self.gym_forager_env = ForagerEnv(obs_type='egocentric', incl_values=False, frameskip=2)
        self.gym_forager_env.seed(self.seed+2)

        # Setup an instance of the double DQN agent for determining optimal actions
        self.ddqn_agent = DoubleDQNAgent(model_load_path=MODEL_PATH,
                                         eval_mode=True,
                                         save_frames=False,
                                         render=RENDER,
                                         env=self.gym_forager_env)

        # Setup the PsyNeuLink composition
        self._setup_composition()
Exemplo n.º 4
0
def main():
    env = ForagerEnv()
    reward = 0
    done = False
    if RENDER:
        env.render()  # If visualization is desired
    else:
        print("Running simulation...")
    steps = 0
    start_time = timeit.default_timer()
    for _ in range(num_trials):
        observation = env.reset()
        while True:
            if PNL:
                if PNL_COMPILE:
                    BIN_EXECUTE = 'LLVMRun'
                else:
                    BIN_EXECUTE = 'Python'
                run_results = agent_comp.run(inputs={
                    player: [observation[player_coord_idx]],
                    prey: [observation[prey_coord_idx]],
                },
                                             bin_execute=BIN_EXECUTE)
                run_results[0] = np.array(run_results[0])
                action = np.where(run_results[0] == 0, 0,
                                  run_results[0] / np.abs(run_results[0]))
            else:
                run_results = observation[prey_coord_idx] - observation[
                    player_coord_idx]
                action = np.where(run_results == 0, 0,
                                  run_results / np.abs(run_results))

            observation, reward, done, _ = env.step(action)
            steps += 1
            if done:
                break
    stop_time = timeit.default_timer()
    print(
        f'{steps / (stop_time - start_time):.1f} steps/second, {steps} total steps in '
        f'{stop_time - start_time:.2f} seconds')
    if RENDER:
        env.render()  # If visualization is desired
Exemplo n.º 5
0
class PredatorPreySimulator:

    def __init__(self):

        self.seed = int.from_bytes(os.urandom(4), byteorder="big")

        from psyneulink.core.globals.utilities import set_global_seed
        set_global_seed(self.seed)
        np.random.seed(self.seed+1)

        # Setup a Gym Forager environment for the game
        self.gym_forager_env = ForagerEnv(obs_type='egocentric', incl_values=False, frameskip=2)
        self.gym_forager_env.seed(self.seed+2)

        # Setup an instance of the double DQN agent for determining optimal actions
        self.ddqn_agent = DoubleDQNAgent(model_load_path=MODEL_PATH,
                                         eval_mode=True,
                                         save_frames=False,
                                         render=RENDER,
                                         env=self.gym_forager_env)

        # Setup the PsyNeuLink composition
        self._setup_composition()

    # Helper function for getting the optimal action from the double DQN
    def _get_optimal_action(self, observation):
        # Get new state based on observation:
        veridical_state = self.ddqn_agent.buffer.next(np.array(observation))
        optimal_action = np.array(self.ddqn_agent._io_map(self.ddqn_agent._select_action(veridical_state).item()))
        if VERBOSE >= ACTION_REPORTING:
            print(f'\n\nOPTIMAL OBSERVATION: {observation}'
                  f'\nVERIDICAL STATE: {veridical_state.reshape(12, )}'
                  f'\nOPTIMAL ACTION: {optimal_action}')
        return optimal_action

    def _setup_composition(self):

        def get_new_episode_flag():
            return self.new_episode_flag

        # Condition for executing controller, execute on a new episode.
        self.new_episode_flag = True
        self.CONTROLLER_CONDITION = Condition(func=get_new_episode_flag)  # tells schedule when to run OCM

        # **************************************  PROCESSING MECHANISMS ********************************************************

        # Perceptual Mechanisms
        self.player_percept = ProcessingMechanism(size=prey_len, function=GaussianDistort(), name="PLAYER PERCEPT")
        self.predator_percept = ProcessingMechanism(size=predator_len, function=GaussianDistort(), name="PREDATOR PERCEPT")
        self.prey_percept = ProcessingMechanism(size=prey_len, function=GaussianDistort(), name="PREY PERCEPT")

        # Mechanism used to encode trialtype from environment
        self.prey_pred_trial_input_mech = ProcessingMechanism(name="PREY PREDATOR TRIAL")
        self.single_prey_trial_input_mech = ProcessingMechanism(name="SINGLE PREY TRIAL")
        self.double_prey_trial_input_mech = ProcessingMechanism(name="DOUBLE PREY TRIAL")

        # Mechanism used to encode a reward from environment
        self.reward_input_mech = ProcessingMechanism(name="REWARD INPUT")

        # Function used by action_mech to generate action from trained DQN
        def get_action(variable=[[0, 0], [0, 0], [0, 0]]):
            # Convert variable to observation:
            observation = variable.reshape(6, )

            # Get new state
            # - first cache initial state of buffer
            buffer_cache = self.ddqn_agent.buffer.buffer.copy()

            # - then get new state based on current observation
            perceptual_state = self.ddqn_agent.buffer.next(observation)

            # - finally, restore frame buffer to initial state for use by next simulation or actual action
            self.ddqn_agent.buffer.buffer = buffer_cache

            # Get and return action
            action = np.array(self.ddqn_agent._io_map(self.ddqn_agent._select_action(perceptual_state).item()))
            if VERBOSE >= ACTION_REPORTING:
                print(f'\n\nACTUAL OBSERVATION: {observation}'
                      f'\nACTUAL PERCEPTUAL STATE: {perceptual_state.reshape(12, )}'
                      f'\nACTUAL ACTION FROM FUNCTION: {action}')
            return action

        # Action Mechanism
        #    Use ddqn's eval function to compute action for a given observation
        #    note: unitization is done in main loop, to allow compilation of LinearCombination function in ObjectiveMech) (TBI)
        self.action_mech = ProcessingMechanism(default_variable=[[0,0],[0,0],[0,0]],
                                          function=get_action, name='ACTION',
                                          output_ports='agent action')

        # ************************************** BASIC COMPOSITION *************************************************************

        self.agent_comp = Composition(name='PREDATOR-PREY COMPOSITION')

        self.agent_comp.add_nodes([self.player_percept, self.predator_percept,
                              self.prey_percept, self.prey_pred_trial_input_mech,
                              self.single_prey_trial_input_mech, self.double_prey_trial_input_mech,
                              self.reward_input_mech])
        self.agent_comp.add_node(self.action_mech, required_roles=[NodeRole.OUTPUT])

        a = MappingProjection(sender=self.player_percept, receiver=self.action_mech.input_ports[0])
        b = MappingProjection(sender=self.predator_percept, receiver=self.action_mech.input_ports[1])
        c = MappingProjection(sender=self.prey_percept, receiver=self.action_mech.input_ports[2])
        self.agent_comp.add_projections([a,b,c])


        # **************************************  CONOTROL APPARATUS ***********************************************************
        self.ocm = OptimizationControlMechanism(name='EVC',
                                                state_features=[self.prey_pred_trial_input_mech, self.single_prey_trial_input_mech, self.double_prey_trial_input_mech],
                                                # state_feature_function=FEATURE_FUNCTION,
                                                agent_rep=RegressionCFA(
                               update_weights=BayesGLM(mu_0=-0.0, sigma_0=0.0001),
                               prediction_terms=[PV.F, PV.C, PV.COST]
                       ),
                                                function=GridSearch(direction=MAXIMIZE, save_values=True),

                                                objective_mechanism=ObjectiveMechanism(name='OBJECTIVE MECHANISM',
                                                              monitor=[self.reward_input_mech]),
                                                control_signals=[ControlSignal(projections=(VARIANCE,self.player_percept),
                                                      allocation_samples=ALLOCATION_SAMPLES_PLAYER,
                                                      intensity_cost_function=Exponential(rate=COST_RATE,
                                                                                          bias=COST_BIAS)),
                                        ControlSignal(projections=(VARIANCE,self.predator_percept),
                                                      allocation_samples=ALLOCATION_SAMPLES_PREDATOR,
                                                      intensity_cost_function=Exponential(rate=COST_RATE,
                                                                                          bias=COST_BIAS)),
                                        ControlSignal(projections=(VARIANCE,self.prey_percept),
                                                      allocation_samples=ALLOCATION_SAMPLES_PREY,
                                                      intensity_cost_function=Exponential(rate=COST_RATE,
                                                                                          bias=COST_BIAS))])
        # Add controller to Composition
        # agent_comp.add_node(ocm)
        self.agent_comp.add_controller(self.ocm)
        self.agent_comp.enable_controller = True
        self.agent_comp.controller_mode = BEFORE
        self.agent_comp.controller_condition=self.CONTROLLER_CONDITION # can also specify this condition on the node if the ocm is added as a node
                                                            # agent_comp,scheduler_processing.add_condition((com, CONTROLLER_CONDITION))

        if SHOW_GRAPH:
            self.agent_comp.show_graph(show_controller=True, show_cim=True)

        # Wrap the entire composition inside another composition so we can perform
        # parameter optimization.
        self.opt_comp = Composition(name='outer_opt_comp')
        self.opt_comp.add_node(self.agent_comp)

    def make_input_generator(self, num_episodes=100):

        self.outcome_log = []
        self.reward_log = []
        self.predator_control_log = []
        self.prey_control_log = []

        # The context/execution id to use for all the runs
        self.context = Context()

        # Helper function to print controller details
        def print_controller():
            print(f'\nOCM:'
                  f'\n\tControlSignals:'
                  f'\n\t\tPlayer:\t\t{self.ocm.control_signals[0].parameters.value.get(self.context)}'
                  f'\n\t\tPredator\t{self.ocm.control_signals[1].parameters.value.get(self.context)}'
                  f'\n\t\tPrey:\t\t{self.ocm.control_signals[2].parameters.value.get(self.context)}'
                  f'\n\n\tControlSignal Costs:'
                  f'\n\t\tPlayer:\t\t{self.ocm.control_signals[0].parameters.cost.get(self.context)}'
                  f'\n\t\tPredator:\t{self.ocm.control_signals[1].parameters.cost.get(self.context)}'
                  f'\n\t\tPrey:\t\t{self.ocm.control_signals[2].parameters.cost.get(self.context)}')

        # The input generator function
        def input_generator():

            if RENDER:
                self.ddqn_agent.env.render()  # If visualization is desired
            else:
                print('\nRunning simulation... ')

            reward = 0
            steps = 0
            start_time = timeit.default_timer()
            for episode_i in range(num_episodes):
                trialType = 2
                prey_pred_trialType = 0
                single_prey_trialType = 0
                double_prey_trialType = 0
                print(f'EPISODE {episode_i}')

                self.ddqn_agent.env.trialType = trialType  # 0 is single prey, 1 is two prey, 2 is prey & predator

                # Start a new episode by resetting the enviroment
                observation = self.ddqn_agent.env.reset()

                # Set the new episode flag, controller condition depends on this.
                self.new_episode_flag = True

                while True:

                    if VERBOSE >= STANDARD_REPORTING:
                        print(f'\nEPISODE {episode_i}, STEP: {steps} ************************************************')

                    # Cache frame buffer
                    trial_start_buffer = self.ddqn_agent.buffer.buffer.copy()
                    # Get optimal action based on observation
                    optimal_action = self._get_optimal_action(observation)
                    # Save frame buffer after optimal action
                    optimal_agent_frame_buffer = self.ddqn_agent.buffer.buffer
                    # Restore initial state of frame buffer (for use by Composition)
                    self.ddqn_agent.buffer.buffer = trial_start_buffer

                    if VERBOSE >= ACTION_REPORTING:
                        print(f'\nOUTER LOOP OPTIMAL ACTION:{optimal_action}')

                    # Yield the next input the agent composition. Since this generator is
                    # passed to the outert optimization composition, it must generate
                    # an input dictionary keyed by the inner agent composition node.
                    yield {
                            self.agent_comp: {
                                self.player_percept:[observation[player_coord_slice]],
                                self.predator_percept:[observation[predator_coord_slice]],
                                self.prey_percept:[observation[prey_coord_slice]],
                                self.prey_pred_trial_input_mech:[prey_pred_trialType],
                                self.single_prey_trial_input_mech: [single_prey_trialType],
                                self.double_prey_trial_input_mech: [double_prey_trialType],
                                self.reward_input_mech: [reward]
                                }
                            }

                    # Get agent's action based on perceptual distortion of observation (and application of control)
                    run_results = self.opt_comp.results[-1]
                    agent_action = np.where(run_results[0]==0,0,run_results[0]/np.abs(run_results[0]))

                    if VERBOSE >= ACTION_REPORTING:
                        print(f'OUTER LOOP RUN RESULTS:{run_results}')
                        print(f'OUTER LOOP AGENT ACTION:{agent_action}')

                    if VERBOSE >= STANDARD_REPORTING:
                        if self.agent_comp.controller_mode is BEFORE:
                            print_controller()
                        print(f'\nObservations:'
                              f'\n\tPlayer:\n\t\tveridical: {self.player_percept.parameters.variable.get(self.context)}'
                              f'\n\t\tperceived: {self.player_percept.parameters.value.get(self.context)}'
                              f'\n\tPredator:\n\t\tveridical: {self.predator_percept.parameters.variable.get(self.context)}'
                              f'\n\t\tperceived: {self.predator_percept.parameters.value.get(self.context)}'
                              f'\n\tPrey:\n\t\tveridical: {self.prey_percept.parameters.variable.get(self.context)}'
                              f'\n\t\tperceived: {self.prey_percept.parameters.value.get(self.context)}'
                              f'\n\nActions:\n\tAgent: {agent_action}\n\tOptimal: {optimal_action}'
                              f'\n\nOutcome:\n\t{self.ocm.objective_mechanism.parameters.value.get(self.context)}'
                              )
                        if self.agent_comp.controller_mode is AFTER:
                            print_controller()

                    self.outcome_log.append(self.ocm.objective_mechanism.parameters.value.get(self.context))

                    # Restore frame buffer to state after optimal action taken (at beginning of trial)
                    # This is so that agent's action's can be compared to optimal ones on a trial-by-trial basis
                    self.ddqn_agent.buffer.buffer = optimal_agent_frame_buffer

                    # if ACTION is OPTIMAL_ACTION:
                    #     action = optimal_action
                    # elif ACTION is AGENT_ACTION:
                    #     action = agent_action
                    # else:
                    #     assert False, "Must choose either OPTIMAL_ACTION or AGENT_ACTION"
                    action = agent_action

                    # Get observation for next iteration based on optimal action taken in this one
                    observation, reward, done, _ = self.ddqn_agent.env.step(action)

                    if VERBOSE >= STANDARD_REPORTING:
                        print(f'\nAction Taken (using {ACTION}): {action}')

                    self.new_episode_flag = False
                    steps += 1
                    if done:
                        break

                self.predator_control_log.append(self.ocm.control_signals[1].parameters.value.get(self.context))
                self.prey_control_log.append(self.ocm.control_signals[2].parameters.value.get(self.context))
                self.reward_log.append(reward)

            stop_time = timeit.default_timer()
            print(f'{steps / (stop_time - start_time):.1f} steps/second, {steps} total steps in '
                  f'{stop_time - start_time:.2f} seconds')

            outcome_mean = np.mean(np.asarray(self.outcome_log))
            reward_mean = np.mean(np.asarray(self.reward_log))
            print(f'\nTotal Outcome: {outcome_mean}')
            print(f'\nTotal Reward: {reward_mean}')
            print('predator control log')
            print(self.predator_control_log)
            print('prey control log')
            print(self.prey_control_log)
            predator_control_mean = np.mean(np.asarray(self.predator_control_log))
            print(f'\npredator control MEAN: {predator_control_mean}')
            prey_control_mean = np.mean(np.asarray(self.prey_control_log))
            print(f'\nprey control MEAN: {prey_control_mean}')

            if RENDER:
                self.ddqn_agent.env.render(close=True)  # If visualization is desired

        # Return the generator instantiation function.
        return input_generator

    def run_games(self, cost_rate):

        # Setup data generator.
        input_gen = self.make_input_generator(NUM_EPISODES)

        self.ocm.control_signals[0].parameters.intensity_cost_function.get(self.context).parameters.rate.set(cost_rate, self.context)
        self.ocm.control_signals[0].parameters.intensity_cost_function.get(self.context).parameters.rate.set(cost_rate, self.context)
        self.ocm.control_signals[0].parameters.intensity_cost_function.get(self.context).parameters.rate.set(cost_rate, self.context)

        # Run num_episodes games to completion.
        self.opt_comp.run(inputs=input_gen,
                       bin_execute='LLVM' if PNL_COMPILE else 'Python',
                       context=self.context)

        loss = np.abs(np.mean(np.asarray(self.predator_control_log[-20:])) - 500) + np.mean(np.asarray(self.prey_control_log[-20:]))
        print(f"Loss = {loss}")

        return loss
Exemplo n.º 6
0
# For future use:
values = TransferMechanism(size=3, name="AGENT VALUES")
reward = TransferMechanism(name="REWARD")

# Use ComparatorMechanism to compute direction of action as difference of coordinates between player and prey:
# note: unitization is done in main loop, to allow compilation of LinearCombination function) (TBI)
greedy_action_mech = ComparatorMechanism(name='MOTOR OUTPUT',
                                         sample=player,
                                         target=prey)

agent_comp = Composition(name='PREDATOR-PREY COMPOSITION')
agent_comp.add_node(player)
agent_comp.add_node(prey)
agent_comp.add_node(greedy_action_mech)

agent_comp.env = ForagerEnv(
)  # NEW: ForagerEnv must be stored in an attribute on the Composition


def main():

    # NEW: get_next_input interactively returns a new input from the ForagerEnv
    #      (rather than specifying a pre-determined list of input values)
    def get_next_input(env, result):
        action = np.where(result[0] == 0, 0, result[0] / np.abs(result[0]))
        env_step = env.step(action)
        observation = env_step[0]
        done = env_step[2]
        if not done:
            # NEW: This function MUST return a dictionary of input values for a single trial for each INPUT node
            return {
                player: [observation[player_coord_idx]],
Exemplo n.º 7
0
from psyneulink import *
from gym_forager.envs.forager_env import ForagerEnv
from gym_forager.envs.forager.randomexp import RandomAgent


num_trials = 4
env = ForagerEnv()
reward = 0
done = False


# Function used by PsyNeuLink Mechanism
r = RandomAgent(env.action_space)
def random_action(variable):
    return r.act(variable, None, None)

# Create PsyNeuLink Composition as agent
agent_mech = ProcessingMechanism(function=random_action)
agent_comp = Composition()

agent_comp.add_node(agent_mech)

def main():
    for _ in range(num_trials):
        observation = env.reset()
        while True:
            run_results = agent_comp.run(inputs={agent_mech:observation})
            action=run_results[0]
            observation, reward, done, _ = env.step(action)
            if done:
                break