def can_escape_by_one_step(point1, point2): env = Environment() env.red_player = Entity() env.red_player.x = point1[0] env.red_player.y = point1[1] env.blue_player = Entity() env.blue_player.x = point2[0] env.blue_player.y = point2[1] win_stat = env.compute_terminal() if win_stat == WinEnum.Blue: return True return False
def learn_decision_maker(decision_maker, n_samples = 20, save_to_file=False): env = Environment() blue_decision_maker = decision_maker # the agent we want to learn its behaviour policy red_decision_maker = decision_maker # the imaginary agent we want to learn its behaviour policy env.blue_player = Entity(blue_decision_maker) env.red_player = Entity(red_decision_maker) policy_counts = {} # go over each states = (blue position, red position) for state in state_generator(): if is_terminal_state(state): continue # set position of the players in the environment set_env_state(env, state) # get observation observation_for_blue: State = env.get_observation_for_blue() # the agents are not deterministic, so we want to find the distribution p(a|s) for i_samp in range(n_samples): # get the action chosen by each player action_blue = blue_decision_maker.get_action(observation_for_blue) a = action_blue - 1 # change to 0-based index update_pol_cnts(state, a, policy_counts) # end for # end for print('Finished learning the enemy') if save_to_file: with open(f'learned_{agent_name}_enemy', 'wb') as myfile: pickle.dump([agent_name, policy_counts, n_samples], myfile) return policy_counts
import json from Arena.Environment import Environment from Arena.Entity import Entity from RafaelPlayer.RafaelDecisionMaker import RafaelDecisionMaker from Arena.constants import WIN_REWARD, MOVE_PENALTY, MAX_STEPS_PER_EPISODE, HARD_AGENT from misharon_utils import state_action_generator, get_Q_vals, set_env_state, is_terminal_state, derive_greedy_policy from misharon_learn_the_enemy import n_actions #------------------------------------------------------------------------------------------------------------~ # define dummy players, just so we can use the class functions dummy_blue = Entity(RafaelDecisionMaker(HARD_AGENT)) dummy_red = Entity(RafaelDecisionMaker(HARD_AGENT)) env = Environment() env.blue_player = dummy_blue env.red_player = dummy_red #------------------------------------------------------------------------------------------------------------~ # ------------------------------------------------------------------------------------------------------------~ def get_reward(env, state): set_env_state(env, state) reward_blue, reward_red = env.handle_reward() return reward_blue # ------------------------------------------------------------------------------------------------------------~
EVALUATE = False return EVALUATE def print_states(observation_for_blue_s0, observation_for_blue_s1): import matplotlib.pyplot as plt plt.matshow(observation_for_blue_s0.img) plt.show() plt.matshow(observation_for_blue_s1.img) plt.show() if __name__ == '__main__': env = Environment(IS_TRAINING) print("Starting Blue player") blue_decision_maker = DQNAgent_keras.DQNAgent_keras() #blue_decision_maker = DQNAgent_keras.DQNAgent_keras(UPDATE_CONTEXT=True, path_model_to_load='conv1(6_6_1_256)_conv2(4_4_256_128)_conv3(3_3_128_128)_flatten_fc__blue_202001_ 0.95max_ -0.04avg_ -3.10min__1620558885.model') print("Starting red player") ### Red Decision Maker red_decision_maker = Greedy_player.Greedy_player() env.blue_player = Entity(blue_decision_maker) env.red_player = Entity(red_decision_maker) print_start_of_game_info(blue_decision_maker, red_decision_maker)
from AbsDecisionMaker import AbsDecisionMaker from constants import SIZE_Y, SIZE_X from Arena.CState import State from Arena.constants import AgentAction, HARD_AGENT from Arena.Environment import Environment from RafaelPlayer.RafaelDecisionMaker import RafaelDecisionMaker from misharon_utils import derive_greedy_policy, update_Q_matrix, init_Q_matrix, is_terminal_state from misharon_learn_the_enemy import learn_decision_maker from plan_anti_policy import plan_anti_policy #------------------------------------------------------------------------------------------------------------~ # define dummy players, just so we can use the class functions env = Environment() #------------------------------------------------------------------------------------------------------------~ class misharonHillDecisionMaker(AbsDecisionMaker): def update_context(self, new_state: State, reward, is_terminal): prev_state = copy(self.state) self.state = new_state red_prev_state = prev_state update_Q_matrix(self._Q_matrix, new_state) # needed for main.py to run # TODO: learn the enemy - see the last move the enemy made, and update the enemy_policy - update enemy_policy
from Arena.CState import State from Arena.Entity import Entity from RafaelPlayer.RafaelDecisionMaker import RafaelDecisionMaker from Arena.Environment import Environment, Episode from Arena.constants import * from misharonDecisionMaker import misharonDecisionMaker style.use("ggplot") # MAIN: if __name__ == '__main__': start_time = timeit.default_timer() env = Environment() blue_decision_maker = misharonDecisionMaker() # use our agent # blue_decision_maker = misharonHillDecisionMaker() # use our agent for the King of the Hill # red_decision_maker = RafaelDecisionMaker(EASY_AGENT) # red_decision_maker = RafaelDecisionMaker(MEDIUM_AGENT) red_decision_maker = RafaelDecisionMaker(HARD_AGENT) env.blue_player = Entity(blue_decision_maker) env.red_player = Entity(red_decision_maker) for episode in range(1, NUM_OF_EPISODES + 1): current_episode = Episode(episode)