示例#1
0
obs = torch.zeros((args.num_steps, args.num_envs) +
                  envs.observation_space.shape).to(device)
actions = torch.zeros((args.num_steps, args.num_envs) +
                      envs.action_space.shape).to(device)
logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device)
rewards = torch.zeros((args.num_steps, args.num_envs)).to(device)
dones = torch.zeros((args.num_steps, args.num_envs)).to(device)
values = torch.zeros((args.num_steps, args.num_envs)).to(device)
invalid_action_masks = torch.zeros((args.num_steps, args.num_envs) +
                                   (envs.action_space.nvec.sum(), )).to(device)
# TRY NOT TO MODIFY: start the game
global_step = 0
start_time = time.time()
# Note how `next_obs` and `next_done` are used; their usage is equivalent to
# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/84a7582477fb0d5c82ad6d850fe476829dddd2e1/a2c_ppo_acktr/storage.py#L60
next_obs = envs.reset()
next_done = torch.zeros(args.num_envs).to(device)
num_updates = args.total_timesteps // args.batch_size
sw = stopwatch.StopWatch()
## CRASH AND RESUME LOGIC:
starting_update = 1

if args.prod_mode and wandb.run.resumed:
    print("previous run.summary", run.summary)
    starting_update = run.summary['charts/update'] + 1
    global_step = starting_update * args.batch_size
    api = wandb.Api()
    run = api.run(run.get_url()[len("https://app.wandb.ai/"):])
    model = run.file('agent.pt')
    model.download(f"models/{experiment_name}/")
    agent.load_state_dict(torch.load(f"models/{experiment_name}/agent.pt"))
示例#2
0
from gym_microrts.envs.vec_env import MicroRTSVecEnv
from gym_microrts import microrts_ai
from gym.envs.registration import register
from gym_microrts import Config

try:
    env = MicroRTSVecEnv(num_envs=1,
                         render_theme=2,
                         ai2s=[microrts_ai.coacAI],
                         map_path="maps/16x16/basesWorkers16x16.xml",
                         reward_weight=np.array(
                             [10.0, 1.0, 1.0, 0.2, 1.0, 4.0]))
    # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env
    # env = gym.wrappers.RecordEpisodeStatistics(env)
    # env.action_space.seed(0)
    obs = env.reset()
    env.render()
except Exception as e:
    e.printStackTrace()
env.action_space.seed(0)
env.reset()
for i in range(10000):
    env.render()
    action_mask = np.array(env.vec_client.getUnitLocationMasks()).flatten()
    time.sleep(0.001)
    action = env.action_space.sample()

    # optional: selecting only valid units.
    if len(action_mask.nonzero()[0]) != 0:
        action[0] = action_mask.nonzero()[0][0]