if __name__ == "__main__": # maze game env = Env() RL = DeepQNetwork(env.n_actions, env.n_features, learning_rate=0.0001, reward_decay=0.9, e_greedy=0.75, replace_target_iter=2000, memory_size=MEMORYCAPACITY, batch_size=64 # output_graph=True ) RL.restore_model() for episode in range(EPS): env.build_map() value = 0 for step in range(STEP): state = env.state.copy() action = RL.choose_action(state) env.step(action_space[action]) state_ = env.state.copy() reward, dist = compute_reward(state, state_) RL.store_transition(state, action, reward, state_) value += reward if dist < DIST: break if RL.memory_counter > MEMORYCAPACITY: