model.download(f"models/{experiment_name}/") agent.load_state_dict(torch.load(f"models/{experiment_name}/agent.pt")) agent.eval() print(f"resumed at update {starting_update}") for update in range(starting_update, num_updates + 1): with sw.timer('main'): # Annealing the rate if instructed to do so. if args.anneal_lr: frac = 1.0 - (update - 1.0) / num_updates lrnow = lr(frac) optimizer.param_groups[0]['lr'] = lrnow with sw.timer('rollouts'): # TRY NOT TO MODIFY: prepare the execution of the game. for step in range(0, args.num_steps): envs.render() global_step += 1 * args.num_envs obs[step] = next_obs dones[step] = next_done # ALGO LOGIC: put action logic here with torch.no_grad(): with sw.timer('inference_value'): values[step] = agent.get_value(obs[step]).flatten() with sw.timer('inference_action'): action, logproba, _, invalid_action_masks[ step] = agent.get_action(obs[step], envs=envs, sw=sw) actions[step] = action.T
from gym_microrts import microrts_ai from gym.envs.registration import register from gym_microrts import Config try: env = MicroRTSVecEnv(num_envs=1, render_theme=2, ai2s=[microrts_ai.coacAI], map_path="maps/16x16/basesWorkers16x16.xml", reward_weight=np.array( [10.0, 1.0, 1.0, 0.2, 1.0, 4.0])) # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env # env = gym.wrappers.RecordEpisodeStatistics(env) # env.action_space.seed(0) obs = env.reset() env.render() except Exception as e: e.printStackTrace() env.action_space.seed(0) env.reset() for i in range(10000): env.render() action_mask = np.array(env.vec_client.getUnitLocationMasks()).flatten() time.sleep(0.001) action = env.action_space.sample() # optional: selecting only valid units. if len(action_mask.nonzero()[0]) != 0: action[0] = action_mask.nonzero()[0][0] next_obs, reward, done, info = env.step([action])