Exemplo n.º 1
0
c.model_save_int = 100  # in episodes
c.profile_int = 50  # in episodes


def policy_noise(action):
    return t.clamp(
        add_clipped_normal_noise_to_action(action, c.policy_noise_params), -1,
        1)


if __name__ == "__main__":
    save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial)
    prep_args(c, save_env)

    # save_env.remove_trials_older_than(diff_hour=1)
    global_board.init(save_env.get_trial_train_log_dir())
    writer = global_board.writer
    logger.info("Directories prepared.")

    actor = MW(
        Actor(observe_dim, action_dim, 1).to(c.device), c.device, c.device)
    actor_t = MW(
        Actor(observe_dim, action_dim, 1).to(c.device), c.device, c.device)
    critic = MW(
        Critic(observe_dim, action_dim).to(c.device), c.device, c.device)
    critic_t = MW(
        Critic(observe_dim, action_dim).to(c.device), c.device, c.device)
    critic2 = MW(
        Critic(observe_dim, action_dim).to(c.device), c.device, c.device)
    critic2_t = MW(
        Critic(observe_dim, action_dim).to(c.device), c.device, c.device)
Exemplo n.º 2
0
    if is_warm_up:
        # generate random actions
        act_dim = env.get_action_space(group_handle)[0]
        actions = np.random.randint(0, act_dim, agent_num, dtype=np.int32)

    env.set_action(group_handle, actions)


if __name__ == "__main__":
    total_steps = max_epochs * max_episodes * max_steps

    # preparations
    prep_dirs_default(root_dir)
    logger.info("Directories prepared.")
    global_board.init(log_dir + "train_log")
    writer = global_board.writer

    env = magent.GridWorld(generate_combat_config(map_size), map_size=map_size)
    agent_num = int(np.sqrt(map_size * map_size * agent_ratio))**2
    group1_handle, group2_handle = env.get_handles()

    # shape: (act,)
    action_dim = env.get_action_space(group1_handle)[0]
    # shape: (view_width, view_height, n_channel)
    view_space = env.get_view_space(group1_handle)
    view_dim = np.prod(view_space)
    # shape: (ID embedding + last action + last reward + relative pos)
    feature_dim = env.get_feature_space(group1_handle)[0]

    base_actor = SwarmActor(view_dim, action_dim, history_depth, neighbor_num,
Exemplo n.º 3
0
    logger.info("Framework 1 initialized")

    c2.restart_from_trial = load_trial2
    framework2 = create_models2()
    logger.info("Framework 2 initialized")

    operators = [(framework1, run_agents1, load_framework1),
                 (framework2, run_agents2, load_framework2)]

    # testing
    # preparations
    config = generate_combat_config(map_size)
    env = magent.GridWorld(config, map_size=map_size)
    env.reset()

    global_board.init(test_root_dir)
    writer = global_board.writer
    logger.info("Directories prepared.")

    # begin training
    episode = Counter()
    episode_finished = False
    wins = [0, 0]

    while episode < max_episodes:
        episode.count()
        logger.info("Begin episode {} at {}".format(episode, dt.now().strftime("%m/%d-%H:%M:%S")))

        # environment initialization
        env.reset()
        env.set_render_dir(test_root_dir)