def game_events_occurred(self, old_game_state, self_action, new_game_state, events): if not old_game_state == None: self.oldfields.append(transformfield(old_game_state)[0]) self.newfields.append(transformfield(new_game_state)[0]) self.actions.append(strtoint(self_action)) self.allevents.append(events)
def end_of_round(self, last_game_state, last_action, events): self.oldfields.append(transformfield(last_game_state,dist=5,nearest=True)[0]) self.actions.append(strtoint(last_action)) self.allevents.append(events) x=np.asarray(self.oldfields) rewards=getrewards(self.allevents) with open("data.pickle","ab") as file: for i in range(len(x)): act=self.actions[i] y=np.zeros(6) y[act]=rewards[i] pickle.dump((x[i],y),file) file.close() self.oldfields=[] self.actions=[] self.allevents=[]
def act(self, game_state): field = transformfield(game_state, dist=5, nearest=True) p = self.model.predict(field)[0] if self.train: if np.random.rand() < self.epsilon: return np.random.choice(ACTIONS, p=[0.2, 0.2, 0.2, 0.2, 0.1, 0.1]) return ACTIONS[np.argmax(p)]
def end_of_round(self, last_game_state, last_action, events): self.oldfields.append(transformfield(last_game_state,dist=5,nearest=True)) self.actions.append(strtohot(last_action)) self.allevents.append(events) weights=weights(self.allevents) model.fit(self.oldfields,self.actions,weights) model.save("model")
def game_events_occurred(self, old_game_state, self_action, new_game_state, events): if not old_game_state==None: self.oldfields.append(transformfield(old_game_state,dist=5,nearest=True)) self.actions.append(strtohot(self_action)) self.allevents.append(events) if e.KILLED_OPPONENT in events: print(events) print(self.allevents[-5]) print(self.actions[-5])
def end_of_round(self, last_game_state, last_action, events): self.oldfields.append(transformfield(last_game_state)) self.actions.append(strtohot(last_action)) result = last_game_state["self"][1] res = [] for other in last_game_state["others"]: res.append(other[1]) if len(res) == 0: res = [0, 0] if result >= max(res): for i in range(len(self.oldfields)): with open("data.pickle", "ab") as file: pickle.dump((self.oldfields[i], self.actions[i]), file) self.oldfields = [] self.actions = []
def end_of_round(self, last_game_state, last_action, events): self.oldfields.append( transformfield(last_game_state, dist=5, nearest=True)[0]) self.actions.append(strtoint(last_action)) self.allevents.append(events) x = np.asarray(self.oldfields) next = np.asarray(self.newfields) y = self.model.predict(x) target = self.target.predict(next) rewards = getrewards(self.allevents) for i in range(len(x) - 1): act = self.actions[i] y[i, act] += self.alpha * (rewards[i] + self.gamma * max(target[i]) - y[i, act]) y[-1, self.actions[-1]] += self.alpha * (rewards[-1] - y[i, self.actions[-1]]) mini = min(self.mini, len(x)) minibatch = np.random.permutation(len(x))[:mini] self.model.fit(x[minibatch], y[minibatch], batch_size=self.batch_size) if last_game_state["round"] % 20 == 0: with open("rewards", "ab") as file: pickle.dump(np.mean(rewards), file) file.close() if last_game_state["round"] % 200 == 0: self.model.save("model") self.target.set_weights(self.model.get_weights()) self.epsilon = max(0.01, self.epsilon * 0.998) with open("epsilon", "wb") as file: pickle.dump(self.epsilon, file) file.close() self.oldfields = [] self.newfields = [] self.actions = [] self.allevents = []
def act(self, game_state): field = transformfield(game_state, 11).reshape(1, -1) p = self.model.predict(field)[0] return ACTIONS[np.argmax(p)]
def act(self, game_state): field = transformfield(game_state, 5, nearest=True) p = self.model.predict(field) return ACTIONS[np.argmax(p)]
def game_events_occurred(self, old_game_state, self_action, new_game_state, events): if not old_game_state == None: self.oldfields.append( transformfield(old_game_state, dist=5, nearest=True)) self.actions.append(strtohot(self_action))