示例#1
0
                        agent.load_state_dict(target_agent.state_dict())
                        break

            ## CRASH AND RESUME LOGIC:
            if args.prod_mode:
                if not os.path.exists(f"models/{experiment_name}"):
                    os.makedirs(f"models/{experiment_name}")
                torch.save(agent.state_dict(), f"{wandb.run.dir}/agent.pt")
                wandb.save(f"agent.pt")

            # TRY NOT TO MODIFY: record rewards for plotting purposes
            writer.add_scalar("charts/learning_rate",
                              optimizer.param_groups[0]['lr'], global_step)
            writer.add_scalar("charts/update", update, global_step)
            writer.add_scalar("losses/value_loss", v_loss.item(), global_step)
            writer.add_scalar("losses/policy_loss", pg_loss.item(),
                              global_step)
            writer.add_scalar("losses/entropy",
                              entropy.mean().item(), global_step)
            writer.add_scalar("losses/approx_kl", approx_kl.item(),
                              global_step)
            if args.kle_stop or args.kle_rollback:
                writer.add_scalar("debug/pg_stop_iter", i_epoch_pi,
                                  global_step)
            print("SPS:", int(global_step / (time.time() - start_time)))

    print(stopwatch.format_report(sw.get_last_aggregated_report()))

envs.close()
writer.close()
示例#2
0
try:
    env = MicroRTSVecEnv(num_envs=1,
                         render_theme=2,
                         ai2s=[microrts_ai.coacAI],
                         map_path="maps/16x16/basesWorkers16x16.xml",
                         reward_weight=np.array(
                             [10.0, 1.0, 1.0, 0.2, 1.0, 4.0]))
    # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env
    # env = gym.wrappers.RecordEpisodeStatistics(env)
    # env.action_space.seed(0)
    obs = env.reset()
    env.render()
except Exception as e:
    e.printStackTrace()
env.action_space.seed(0)
env.reset()
for i in range(10000):
    env.render()
    action_mask = np.array(env.vec_client.getUnitLocationMasks()).flatten()
    time.sleep(0.001)
    action = env.action_space.sample()

    # optional: selecting only valid units.
    if len(action_mask.nonzero()[0]) != 0:
        action[0] = action_mask.nonzero()[0][0]

    next_obs, reward, done, info = env.step([action])
    if done:
        env.reset()
env.close()