示例#1
0
def play_a_round(env,
                 map_size,
                 handles,
                 models,
                 print_every,
                 train=True,
                 render=False,
                 eps=None):
    env.reset()

    env.add_walls(method="random", n=map_size * map_size * 0.03)
    env.add_agents(handles[0], method="random", n=map_size * map_size * 0.0125)
    env.add_agents(handles[1], method="random", n=map_size * map_size * 0.025)

    step_ct = 0
    done = False

    n = len(handles)
    obs = [[] for _ in range(n)]
    ids = [[] for _ in range(n)]
    acts = [[] for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    total_reward = [0 for _ in range(n)]

    print("===== sample =====")
    print("eps %s number %s" % (eps, nums))
    start_time = time.time()
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            # let models infer action in parallel (non-blocking)
            models[i].infer_action(obs[i],
                                   ids[i],
                                   'e_greedy',
                                   eps,
                                   block=False)
        for i in range(n):
            acts[i] = models[i].fetch_action()  # fetch actions (blocking)
            env.set_action(handles[i], acts[i])

        # simulate one step
        done = env.step()

        # sample
        step_reward = []
        for i in range(n):
            rewards = env.get_reward(handles[i])
            if train:
                alives = env.get_alive(handles[i])
                # store samples in replay buffer (non-blocking)
                models[i].sample_step(rewards, alives, block=False)
            s = sum(rewards)
            step_reward.append(s)
            total_reward[i] += s

        # render
        if render:
            env.render()

        # clear dead agents
        env.clear_dead()

        # check 'done' returned by 'sample' command
        if train:
            for model in models:
                model.check_done()

        if step_ct % print_every == 0:
            print("step %3d,  reward: %s,  total_reward: %s " %
                  (step_ct, np.around(step_reward,
                                      2), np.around(total_reward, 2)))
        step_ct += 1
        if step_ct > 250:
            break

    sample_time = time.time() - start_time
    print("steps: %d,  total time: %.2f,  step average %.2f" %
          (step_ct, sample_time, sample_time / step_ct))

    # train
    total_loss, value = [0 for _ in range(n)], [0 for _ in range(n)]
    if train:
        print("===== train =====")
        start_time = time.time()

        # train models in parallel
        for i in range(n):
            models[i].train(print_every=2000, block=False)
        for i in range(n):
            total_loss[i], value[i] = models[i].fetch_train()

        train_time = time.time() - start_time
        print("train_time %.2f" % train_time)

    print(total_loss)
    print(total_reward)
    print(value)
    return magent.round(total_loss), magent.round(total_reward), magent.round(
        value)
示例#2
0
number = [1000, 10000, 100000, 1000000]
gpus = range(max_gpu + 1)

ret = []

for n in number:
    row = []
    for g in gpus:
        n_step = 30000000 / n
        cmd = (
            "python scripts/test/test_1m.py --n_step %d --agent_number %d --num_gpu %d --frame %s > /dev/shm/aha "
            "&& cat /dev/shm/aha | grep FPS > %s" %
            (n_step, n, g, framework, tmp_name))
        if n < 1000000:
            cmd = 'OMP_NUM_THREADS=8  ' + cmd
        else:
            cmd = 'OMP_NUM_THREADS=16 ' + cmd
        print(cmd)
        os.system(cmd)
        with open(tmp_name) as fin:
            line = fin.readline()
            x = eval(line)[1]
        row.append(x)
        print(x)

    ret.append(row)

for row in ret:
    print(magent.round(row))
示例#3
0
max_gpu = args.max_gpu
framework = args.frame

number = [1000, 10000, 100000, 1000000]
gpus   = range(max_gpu+1)

ret = []

for n in number:
    row = []
    for g in gpus:
        n_step = 30000000 / n
        cmd = ("python scripts/test/test_1m.py --n_step %d --agent_number %d --num_gpu %d --frame %s > /dev/shm/aha "
               "&& cat /dev/shm/aha | grep FPS > %s" % (n_step, n, g, framework, tmp_name))
        if n < 1000000:
            cmd = 'OMP_NUM_THREADS=8  ' + cmd
        else:
            cmd = 'OMP_NUM_THREADS=16 ' + cmd
        print(cmd)
        os.system(cmd)
        with open(tmp_name) as fin:
            line = fin.readline()
            x = eval(line)[1]
        row.append(x)
        print(x)

    ret.append(row)

for row in ret:
    print(magent.round(row))
示例#4
0
def play_a_round(env,
                 map_size,
                 handles,
                 models,
                 print_every,
                 eps,
                 step_batch_size=None,
                 train=True,
                 train_id=1,
                 render=False):
    """play a round of game"""
    env.reset()
    generate_map(env, map_size, handles)

    step_ct = 0
    done = False

    n = len(handles)
    obs = [[] for _ in range(n)]
    ids = [[] for _ in range(n)]
    acts = [[] for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    total_reward = [0 for _ in range(n)]
    n_transition = 0
    pos_reward_num = 0
    total_loss, value = 0, 0

    print("===== sample =====")
    print("eps %s number %s" % (eps, nums))
    start_time = time.time()
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            # let models infer action in parallel (non-blocking)
            models[i].infer_action(obs[i],
                                   ids[i],
                                   'e_greedy',
                                   eps[i],
                                   block=False)

        for i in range(n):
            acts[i] = models[i].fetch_action()  # fetch actions (blocking)
            env.set_action(handles[i], acts[i])

        # simulate one step
        done = env.step()

        # sample
        step_reward = []
        for i in range(n):
            rewards = env.get_reward(handles[i])
            if train and i == train_id:
                alives = env.get_alive(handles[train_id])
                # store samples in replay buffer (non-blocking)
                models[train_id].sample_step(rewards, alives, block=False)
                pos_reward_num += len(rewards[rewards > 0])
            s = sum(rewards)
            step_reward.append(s)
            total_reward[i] += s

        # render
        if render:
            env.render()

        # stat info
        nums = [env.get_num(handle) for handle in handles]
        n_transition += nums[train_id]

        # clear dead agents
        env.clear_dead()

        # check return message of previous called non-blocking function sample_step()
        if train:
            models[train_id].check_done()

        if step_ct % print_every == 0:
            print(
                "step %3d,  nums: %s reward: %s,  total_reward: %s, pos_rewards %d"
                % (step_ct, nums, np.around(step_reward, 2),
                   np.around(total_reward, 2), pos_reward_num))
        step_ct += 1
        if step_ct > args.n_step:
            break

        if step_batch_size and n_transition > step_batch_size and train:
            total_loss, value = models[train_id].train(500)
            n_transition = 0

    sample_time = time.time() - start_time
    print("steps: %d,  total time: %.2f,  step average %.2f" %
          (step_ct, sample_time, sample_time / step_ct))

    # train
    if train:
        print("===== train =====")
        start_time = time.time()
        total_loss, value = models[train_id].train(500)
        train_time = time.time() - start_time
        print("train_time %.2f" % train_time)

    return magent.round(total_loss), nums, magent.round(
        total_reward), magent.round(value)
示例#5
0
def play_a_round(env, map_size, handles, models, print_every, eps, step_batch_size=None, train=True,
                 train_id=1, render=False):
    """play a round of game"""
    env.reset()
    generate_map(env, map_size, handles)

    step_ct = 0
    done = False

    n = len(handles)
    obs  = [[] for _ in range(n)]
    ids  = [[] for _ in range(n)]
    acts = [[] for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    total_reward = [0 for _ in range(n)]
    n_transition = 0
    pos_reward_num = 0
    total_loss, value = 0, 0

    print("===== sample =====")
    print("eps %s number %s" % (eps, nums))
    start_time = time.time()
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            # let models infer action in parallel (non-blocking)
            models[i].infer_action(obs[i], ids[i], 'e_greedy', eps[i], block=False)

        for i in range(n):
            acts[i] = models[i].fetch_action()  # fetch actions (blocking)
            env.set_action(handles[i], acts[i])

        # simulate one step
        done = env.step()

        # sample
        step_reward = []
        for i in range(n):
            rewards = env.get_reward(handles[i])
            if train and i == train_id:
                alives = env.get_alive(handles[train_id])
                # store samples in replay buffer (non-blocking)
                models[train_id].sample_step(rewards, alives, block=False)
                pos_reward_num += len(rewards[rewards > 0])
            s = sum(rewards)
            step_reward.append(s)
            total_reward[i] += s

        # render
        if render:
            env.render()

        # stat info
        nums = [env.get_num(handle) for handle in handles]
        n_transition += nums[train_id]

        # clear dead agents
        env.clear_dead()

        # check return message of previous called non-blocking function sample_step()
        if train:
            models[train_id].check_done()

        if step_ct % print_every == 0:
            print("step %3d,  nums: %s reward: %s,  total_reward: %s, pos_rewards %d" %
                  (step_ct, nums, np.around(step_reward, 2), np.around(total_reward, 2),
                      pos_reward_num))
        step_ct += 1
        if step_ct > args.n_step:
            break

        if step_batch_size and n_transition > step_batch_size and train:
            total_loss, value = models[train_id].train(500)
            n_transition = 0

    sample_time = time.time() - start_time
    print("steps: %d,  total time: %.2f,  step average %.2f" % (step_ct, sample_time, sample_time / step_ct))

    # train
    if train:
        print("===== train =====")
        start_time = time.time()
        total_loss, value = models[train_id].train(500)
        train_time = time.time() - start_time
        print("train_time %.2f" % train_time)

    return magent.round(total_loss), nums, magent.round(total_reward), magent.round(value)