Exemplo n.º 1
0
GAMMA = 0.99                # reward discount
TARGET_REPLACE_ITER = 999   # target update frequency
DETECTOR_NUM = 0
FIGHTER_NUM = 10
COURSE_NUM = 16
ATTACK_IND_NUM = (DETECTOR_NUM + FIGHTER_NUM) * 2 + 1 # long missile attack + short missile attack + no attack
ACTION_NUM = COURSE_NUM * ATTACK_IND_NUM
LEARN_INTERVAL = TARGET_REPLACE_ITER
BETAS = (0.9, 0.999)
EPS_clip = 0.2
K_epochs = 4
max_timesteps = 300

if __name__ == "__main__":
    # create blue agent
    blue_agent = Agent()
    # get agent obs type
    red_agent_obs_ind = 'ppo'
    blue_agent_obs_ind = blue_agent.get_obs_ind()
    # make env
    env = Environment(MAP_PATH, red_agent_obs_ind, blue_agent_obs_ind, render=RENDER)
    # get map info
    size_x, size_y = env.get_map_size()
    red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num()
    # set map info to blue agent
    blue_agent.set_map_info(size_x, size_y, blue_detector_num, blue_fighter_num)

    red_detector_action = []
    fighter_model = ppo.PPOFighter(ACTION_NUM,LR,BETAS,GAMMA,EPS_clip,K_epochs,MAX_EPOCH,LEARN_INTERVAL,max_timesteps)
    fighter_model.policy.load_state_dict(torch.load('model/ppo/model_000026500.pkl', map_location='cpu'))
    fighter_model.policy_old.load_state_dict(torch.load('model/ppo/model_000026500.pkl', map_location='cpu'))