def move(self, environment): BaseDDQNGameModel.move(self, environment) if np.random.rand() < 0.01: action_vector = random.randrange(self.action_space) else: state = environment.state() q_values = self.ddqn.predict(np.expand_dims( np.asarray(state).astype(np.float64), axis=0), batch_size=1) action_vector = Action.action_from_vector(np.argmax(q_values[0])) return Action.normalized_action(environment.snake_action, action_vector)
def _ddqn(self, total_step_limit=10000000, total_run_limit=None, clip=True): run = 0 total_step = 0 scores = [] while True: if total_run_limit is not None and run >= total_run_limit: print "Reached total run limit of: " + str(total_run_limit) exit(0) run += 1 env = self.prepare_training_environment() current_state = env.state() step = 0 score = env.reward() while True: if total_step >= total_step_limit: print "Reached total step limit of: " + str( total_step_limit) exit(0) total_step += 1 step += 1 action = self._predict_move(current_state) action_vector = Action.action_from_vector(action) normalized_action = Action.normalized_action( env.snake_action, action_vector) next_state, reward, terminal = env.full_step(normalized_action) if clip: np.sign(reward) score += reward self._remember(current_state, action, reward, next_state, terminal) current_state = next_state self._step_update(total_step) if terminal: scores.append(score) if len(scores) % SCORE_LOGGING_FREQUENCY == 0: self.log_score(mean(scores)) print('{{"metric": "score", "value": {}}}'.format( mean(scores))) print('{{"metric": "run", "value": {}}}'.format(run)) scores = [] break