def final(self, state): "Called at the end of each game." # call the super-class final method ReinforcementGhostAgent.final(self, state) # did we finish training? if self.episodesSoFar == self.numTraining: # you might want to print your weights here for debugging "*** YOUR CODE HERE ***" print(self.weights)
def final(self, state): "Called at the end of each game." # call the super-class final method ReinforcementGhostAgent.final(self, state) # did we finish training? if self.episodesSoFar == self.numTraining: # you might want to print your weights here for debugging if self.agentIndex == 1: self.save("agent1_dqn.h5")
def __init__(self, epsilon=0.05, gamma=0.8, alpha=0.2, numTraining=0, agentIndex=1, extractor='GhostIdentityExtractor', **args): "You can initialize Q-values here..." args['epsilon'] = epsilon args['gamma'] = gamma args['alpha'] = alpha args['numTraining'] = numTraining args['agentIndex'] = agentIndex self.index = agentIndex self.q_values = util.Counter() self.featExtractor = util.lookup(extractor, globals())() self.weights = util.Counter() #self.weights["stay-away-from-pacman"] = 1000.0 ReinforcementGhostAgent.__init__(self, **args)
def final(self, state): "Called at the end of each game." # call the super-class final method ReinforcementGhostAgent.final(self, state) # did we finish training? if self.episodesSoFar == self.numTraining: # you might want to print your weights here for debugging # for feature in self.features_dict.keys(): # print(str(len(self.features_dict[feature])) + '=' + str(len(self.weights_dict[feature]))) # steps = range(0, len(self.features_dict[feature])) # plt.plot(steps, self.features_dict[feature], 'Feature value') # plt.plot(steps, self.weights_dict[feature], 'Weight') # plt.xlabel('Step') # plt.title(feature) # plt.show() print(self.weights)
def __init__(self, epsilon=0.05, gamma=0.8, alpha=0.2, numTraining=0, agentIndex=1, extractor='GhostAdvancedExtractor', **args): "You can initialize Q-values here..." args['epsilon'] = epsilon args['gamma'] = gamma args['alpha'] = alpha args['numTraining'] = numTraining args['agentIndex'] = agentIndex self.index = agentIndex self.q_values = util.Counter() self.featExtractor = util.lookup(extractor, globals())() self.weights = util.Counter() self.weights_dict = {} self.features_dict = {} ReinforcementGhostAgent.__init__(self, **args)
def __init__(self, epsilon=1, gamma=0.95, alpha=0.001, numTraining=0, agentIndex=1, extractor='GhostIdentityExtractor', **args): "You can initialize Q-values here..." args['epsilon'] = epsilon args['gamma'] = gamma args['alpha'] = alpha args['numTraining'] = numTraining args['agentIndex'] = agentIndex self.index = agentIndex self.memory = deque(maxlen=2000) self.q_values = util.Counter() self.act_dict = {0: 'North', 1: 'South', 2: 'West', 3: 'East'} self.action_size = 4 #used for output layer in dqn self.state_size = 12 #used for input layer in dqn self.batch_size = 32 #used for model training self.featExtractor = util.lookup(extractor, globals())() self.weights = util.Counter() self.learning_rate = alpha self.gamma = gamma # discount rate self.epsilon = epsilon # exploration rate self.epsilon_min = 0.01 self.epsilon_decay = 0.995 # self.path='D:/SMU/Courses/Introduction to AI/MiniProject_v2/MiniProject/Pacman_MiniProject' self.model = load_model('agent1_dqn.h5') # self.model=self._build_model() self.safety_distance = 12 # safety distance (capsule to pacman + ghost to pacman) self.last_node_pacman_was_heading_towards = None self.last_node_pacman_was_moving_away_from = None self.last_action = None # store action for Qvalues since action is defined differently #self.q_values = util.Counter() # self.q_values = self.load_obj("agent"+str(agentIndex)+"Q") #load previous q values self.segment_dic = { 1: [(1, 6), (1, 7), (1, 8)], 2: [(1, 4), (1, 3), (1, 2)], 3: [(2, 9), (3, 9), (4, 9), (4, 8)], 4: [(2, 5)], 5: [(2, 1), (3, 1), (4, 1), (4, 2)], 6: [(3, 6), (3, 7)], 7: [(3, 3), (3, 4)], 8: [(4, 5), (5, 5)], 9: [(5, 7)], 10: [(5, 3)], 11: [(6, 8), (6, 9), (7, 9), (8, 9), (9, 9), (10, 9), (11, 9), (12, 9), (13, 9), (13, 8)], 12: [(6, 6)], 13: [(6, 4)], 14: [(6, 2), (6, 1), (7, 1), (8, 1), (9, 1), (10, 1), (11, 1), (12, 1), (13, 1), (13, 2)], 15: [(7, 7), (8, 7)], 16: [(7, 3), (8, 3), (9, 3), (10, 3), (11, 3), (12, 3)], 17: [(8, 5), (9, 5), (10, 5), (11, 5), (9, 6), (10, 6)], 18: [(11, 7), (12, 7)], 19: [(13, 6)], 20: [(13, 4)], 21: [(14, 7)], 22: [(14, 5), (15, 5)], 23: [(14, 3)], 24: [(15, 8), (15, 9), (16, 9), (17, 9)], 25: [(15, 2), (15, 1), (16, 1), (17, 1)], 26: [(16, 6), (16, 7)], 27: [(16, 3), (16, 4)], 28: [(17, 5)], 29: [(18, 8), (18, 7), (18, 6)], 30: [(18, 2), (18, 3), (18, 4)], #node 31: [(1, 9)], 32: [(1, 5)], 33: [(1, 1)], 34: [(3, 5)], 35: [(4, 7)], 36: [(4, 3)], 37: [(6, 7)], 38: [(6, 5)], 39: [(6, 3)], 40: [(9, 7)], 41: [(10, 7)], 42: [(13, 7)], 43: [(13, 5)], 44: [(13, 3)], 45: [(15, 7)], 46: [(15, 3)], 47: [(16, 5)], 48: [(18, 9)], 49: [(18, 5)], 50: [(18, 1)] } self.choice_dic = { 1: (31, 32), 2: (32, 33), 3: (31, 35), 4: (32, 34), 5: (33, 36), 6: (34, 35), 7: (34, 36), 8: (34, 38), 9: (35, 37), 10: (36, 39), 11: (37, 42), 12: (37, 38), 13: (38, 39), 14: (39, 44), 15: (37, 40), 16: (39, 44), 17: (40, 41), 18: (41, 42), 19: (42, 43), 20: (43, 44), 21: (42, 45), 22: (43, 47), 23: (44, 46), 24: (45, 48), 25: (46, 50), 26: (45, 47), 27: (46, 47), 28: (47, 49), 29: (48, 49), 30: (49, 50), 31: (32, 35), 32: (31, 33, 34), 33: (32, 36), 34: (32, 35, 36, 38), 35: (31, 34, 37), 36: (33, 34, 39), 37: (35, 38, 40, 42), 38: (34, 37, 39), 39: (36, 38, 44), 40: (37, 41), 41: (40, 42), 42: (37, 41, 43, 45), 43: (42, 44, 47), 44: (39, 43, 46), 45: (42, 47, 48), 46: (44, 47, 50), 47: (43, 45, 46, 49), 48: (45, 49), 49: (47, 48, 50), 50: (46, 49) } ReinforcementGhostAgent.__init__(self, **args)