# make env env = Environment(MAP_PATH, red_agent_obs_ind, blue_agent_obs_ind, max_step=1000, render=RENDER) # get map info size_x, size_y = env.get_map_size() red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num( ) # set map info to blue agent blue_agent.set_map_info(size_x, size_y, blue_detector_num, blue_fighter_num) red_detector_action = [] fighter_model = dqn.RLFighter(ACTION_NUM) # execution for x in range(MAX_EPOCH): step_cnt = 0 env.reset() episodic_reward = 0 while True: obs_list = [] action_list = [] red_fighter_action = [] # get obs if step_cnt == 0: red_obs_dict, blue_obs_dict = env.get_obs() # get action
blue_agent_obs_ind, render=RENDER, max_step=MAX_STEP) # get map info size_x, size_y = env.get_map_size() red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num( ) # set map info to blue agent blue_agent.set_map_info(size_x, size_y, blue_detector_num, blue_fighter_num) red_detector_action = [] fighter_model = dqn.RLFighter(n_actions=ACTION_NUM, learning_rate=LR, reward_decay=GAMMA, e_greedy=EPSILON, e_greedy_increment=EPSILON_INCREMENT, capacity=CAPACITY, batch_size=BATCH_SIZE, replace_target_iter=TARGET_REPLACE_ITER) # execution for i_episode in range(MAX_EPOCH): step_cnt = 0 total_reward = 0.0 # 每回合所有智能体的总体奖励 env.reset() while True: obs_list = [0 for _ in range(red_fighter_num)] action_list = [0 for _ in range(red_fighter_num)] red_fighter_action = [] # get obs if step_cnt == 0: