Пример #1
0
def can_escape_by_one_step(point1, point2):
    env = Environment()

    env.red_player = Entity()
    env.red_player.x = point1[0]
    env.red_player.y = point1[1]

    env.blue_player = Entity()
    env.blue_player.x = point2[0]
    env.blue_player.y = point2[1]

    win_stat = env.compute_terminal()

    if win_stat == WinEnum.Blue:
        return True

    return False
Пример #2
0
def learn_decision_maker(decision_maker, n_samples = 20, save_to_file=False):


    env = Environment()
    blue_decision_maker = decision_maker   # the agent we want to learn its behaviour policy
    red_decision_maker = decision_maker    # the imaginary  agent we want to learn its behaviour policy
    env.blue_player = Entity(blue_decision_maker)
    env.red_player = Entity(red_decision_maker)
    policy_counts = {}

    # go over each states = (blue position, red position)
    for state in state_generator():

        if is_terminal_state(state):
            continue

        # set  position of the players in the environment
        set_env_state(env, state)

        # get observation
        observation_for_blue: State = env.get_observation_for_blue()

        # the agents are not deterministic, so we want to find the distribution p(a|s)
        for i_samp in range(n_samples):
            # get the action chosen by each player
            action_blue = blue_decision_maker.get_action(observation_for_blue)
            a = action_blue - 1   # change to 0-based index
            update_pol_cnts(state, a, policy_counts)
        # end for
    # end for

    print('Finished learning the enemy')
    if save_to_file:
        with open(f'learned_{agent_name}_enemy', 'wb') as myfile:
            pickle.dump([agent_name, policy_counts, n_samples], myfile)

    return policy_counts
Пример #3
0
import json

from Arena.Environment import Environment
from Arena.Entity import Entity
from RafaelPlayer.RafaelDecisionMaker import RafaelDecisionMaker
from Arena.constants import WIN_REWARD, MOVE_PENALTY, MAX_STEPS_PER_EPISODE, HARD_AGENT

from misharon_utils import state_action_generator, get_Q_vals, set_env_state, is_terminal_state, derive_greedy_policy
from misharon_learn_the_enemy import n_actions

#------------------------------------------------------------------------------------------------------------~

# define dummy players, just so we can use the class functions
dummy_blue = Entity(RafaelDecisionMaker(HARD_AGENT))
dummy_red = Entity(RafaelDecisionMaker(HARD_AGENT))
env = Environment()
env.blue_player = dummy_blue
env.red_player = dummy_red

#------------------------------------------------------------------------------------------------------------~

# ------------------------------------------------------------------------------------------------------------~


def get_reward(env, state):
    set_env_state(env, state)
    reward_blue, reward_red = env.handle_reward()
    return reward_blue


# ------------------------------------------------------------------------------------------------------------~
Пример #4
0
        EVALUATE = False
    return EVALUATE

def print_states(observation_for_blue_s0, observation_for_blue_s1):
    import matplotlib.pyplot as plt
    plt.matshow(observation_for_blue_s0.img)
    plt.show()

    plt.matshow(observation_for_blue_s1.img)
    plt.show()


if __name__ == '__main__':


    env = Environment(IS_TRAINING)

    print("Starting Blue player")

    blue_decision_maker = DQNAgent_keras.DQNAgent_keras()
    #blue_decision_maker = DQNAgent_keras.DQNAgent_keras(UPDATE_CONTEXT=True, path_model_to_load='conv1(6_6_1_256)_conv2(4_4_256_128)_conv3(3_3_128_128)_flatten_fc__blue_202001_   0.95max_  -0.04avg_  -3.10min__1620558885.model')

    print("Starting red player")
    ### Red Decision Maker
    red_decision_maker = Greedy_player.Greedy_player()


    env.blue_player = Entity(blue_decision_maker)
    env.red_player = Entity(red_decision_maker)

    print_start_of_game_info(blue_decision_maker, red_decision_maker)
Пример #5
0
from AbsDecisionMaker import AbsDecisionMaker
from constants import SIZE_Y, SIZE_X
from Arena.CState import State
from Arena.constants import AgentAction, HARD_AGENT
from Arena.Environment import Environment
from RafaelPlayer.RafaelDecisionMaker import RafaelDecisionMaker

from misharon_utils import derive_greedy_policy, update_Q_matrix, init_Q_matrix, is_terminal_state
from misharon_learn_the_enemy import learn_decision_maker
from plan_anti_policy import plan_anti_policy

#------------------------------------------------------------------------------------------------------------~

# define dummy players, just so we can use the class functions
env = Environment()

#------------------------------------------------------------------------------------------------------------~


class misharonHillDecisionMaker(AbsDecisionMaker):
    def update_context(self, new_state: State, reward, is_terminal):

        prev_state = copy(self.state)
        self.state = new_state

        red_prev_state = prev_state

        update_Q_matrix(self._Q_matrix, new_state)  # needed for main.py to run

        # TODO: learn the enemy - see the last move the enemy made, and update the enemy_policy - update enemy_policy
Пример #6
0
from Arena.CState import State
from Arena.Entity import Entity
from RafaelPlayer.RafaelDecisionMaker import RafaelDecisionMaker
from Arena.Environment import Environment, Episode
from Arena.constants import *
from misharonDecisionMaker import misharonDecisionMaker

style.use("ggplot")

# MAIN:
if __name__ == '__main__':

    start_time = timeit.default_timer()

    env = Environment()

    blue_decision_maker = misharonDecisionMaker()  # use our agent
    # blue_decision_maker = misharonHillDecisionMaker()  # use our agent for the King of the Hill

    # red_decision_maker = RafaelDecisionMaker(EASY_AGENT)
    # red_decision_maker = RafaelDecisionMaker(MEDIUM_AGENT)
    red_decision_maker = RafaelDecisionMaker(HARD_AGENT)

    env.blue_player = Entity(blue_decision_maker)
    env.red_player = Entity(red_decision_maker)

    for episode in range(1, NUM_OF_EPISODES + 1):

        current_episode = Episode(episode)