Пример #1
0
from magent.model import BaseModel
from numpy import ndarray

MAP_SIZE: int = 64

if __name__ == "__main__":
    environment: magent.GridWorld = magent.GridWorld("forest",
                                                     map_size=MAP_SIZE)
    environment.set_render_dir("render")

    deer_handle: int
    tiger_handle: int
    deer_handle, tiger_handle = environment.get_handles()

    models: List[BaseModel] = [
        RandomActor(environment, deer_handle),
        RandomActor(environment, tiger_handle)
    ]

    environment.reset()
    environment.add_walls(method="random", n=MAP_SIZE * MAP_SIZE * 0.04)
    environment.add_agents(deer_handle, method="random", n=5)
    environment.add_agents(tiger_handle, method="random", n=2)

    tiger_view_space: Tuple = environment.get_view_space(tiger_handle)
    tiger_feature_space: Tuple = environment.get_feature_space(tiger_handle)
    print(
        f"Tiger view space: {tiger_view_space} features: {tiger_feature_space}"
    )

    deer_view_space: Tuple = environment.get_view_space(deer_handle)
Пример #2
0
def sample_observation(env, handles, n_obs=-1, step=-1):
    """Sample observations by random actors.
    These samples can be used for evaluation

    Parameters
    ----------
    env : environment
    handles: list of handle
    n_obs : int
        number of observation
    step : int
        maximum step

    Returns
    -------
    ret : list of raw observation
        raw observation for every group
        the format of raw observation is tuple(view, feature)
    """
    models = [RandomActor(env, handle) for handle in handles]

    n = len(handles)
    views = [[] for _ in range(n)]
    features = [[] for _ in range(n)]

    done = False
    step_ct = 0
    while not done:
        obs = [env.get_observation(handle) for handle in handles]
        ids = [env.get_agent_id(handle) for handle in handles]

        for i in range(n):
            act = models[i].infer_action(obs[i], ids[i])
            env.set_action(handles[i], act)

        done = env.step()
        env.clear_dead()

        # record steps
        for i in range(n):
            views[i].append(obs[i][0])
            features[i].append(features[i][1])

        if step != -1 and step_ct > step:
            break

        if step_ct % 100 == 0:
            print("sample step %d" % step_ct)

        step_ct += 1

    for i in range(n):
        views[i] = np.array(views[i], dtype=np.float32).reshape((-1,) +
                            env.get_view_space(handles[i]))
        features[i] = np.array(features[i], dtype=np.float32).reshape((-1,) +
                               env.get_feature_space(handles[i]))

    if n_obs != -1:
        for i in range(n):
            views[i] = views[i][np.random.choice(np.arange(views[i].shape[0]), n_obs)]
            features[i] = features[i][np.random.choice(np.arange(features[i].shape[0]), n_obs)]

    ret = [(v, f) for v, f in zip(views, features)]
    return ret
Пример #3
0
    parser.add_argument("--greedy", action="store_true")
    parser.add_argument("--map_size", type=int, default=500)
    parser.add_argument("--name", type=str, default="tiger")
    parser.add_argument('--alg', default='dqn', choices=['dqn', 'drqn', 'a2c'])
    args = parser.parse_args()

    # init the game
    env = magent.GridWorld("double_attack", map_size=args.map_size)
    env.set_render_dir("build/render")

    # two groups of animal
    deer_handle, tiger_handle = env.get_handles()

    # init two models
    models = [
        RandomActor(env, deer_handle, tiger_handle),
    ]

    batch_size = 512
    unroll = 8

    if args.alg == 'dqn':
        from magent.builtin.tf_model import DeepQNetwork
        models.append(
            DeepQNetwork(env,
                         tiger_handle,
                         args.name,
                         batch_size=batch_size,
                         memory_size=2**20,
                         learning_rate=4e-4))
        step_batch_size = None
Пример #4
0
    n_step += skip

    # init the game "forest" (or "battle" here)
    env = magent.GridWorld(load_forest(int(math.sqrt(agent_number * 20))))
    env.reset()

    # add two groups of animals
    deer_handle, tiger_handle = env.get_handles()

    env.add_walls(method="random", n=agent_number / 10)
    env.add_agents(deer_handle,  method="random", n=agent_number / 2)
    env.add_agents(tiger_handle, method="random", n=agent_number / 2)

    # init two models
    if args.num_gpu == 0:
        model1 = RandomActor(env, deer_handle, "deer")
        model2 = RandomActor(env, tiger_handle, "tiger")
    else:
        if args.frame == 'tf':
            from magent.builtin.tf_model import DeepQNetwork
        else:
            from magent.builtin.mx_model import DeepQNetwork
        model1 = DeepQNetwork(env, deer_handle, "deer", num_gpu=args.num_gpu, infer_batch_size=100000)
        model2 = DeepQNetwork(env, tiger_handle, "tiger", num_gpu=args.num_gpu, infer_batch_size=100000)

    total_reward = 0

    print(env.get_view_space(deer_handle))
    print(env.get_view_space(tiger_handle))

    total_time = 0