示例#1
0
max_path_length = args.mpl
path_length = 0
done = np.array([False] * num_agent)
c_r = np.zeros(num_agent)
max_o = 0.
while True:
    path_length += 1
    a_n = []
    for i in range(num_agent):
        # a = input("Action for agent {}:\n".format(i))
        # a = np.array(list(map(float,a.split(' '))))
        # a_n.append(a)
        a = env.action_space.sample()
        a_n.append(a)
    o_n, r_n, done, _ = env.step(a_n)
    c_r += r_n
    env.render()
    print("step: ", path_length)
    print("a: ", a_n)
    print("o: ", np.max(np.abs(o_n)))
    if np.max(np.abs(o_n)) > max_o:
        max_o = np.max(np.abs(o_n))
    print('r: ', r_n)
    print(done)
    # pbd.set_trace()
    time.sleep(0.1)
    if path_length > max_path_length or done.all():
        print('c_r: ', c_r)
        print('max_o: ', max_o)
        path_length = 0
示例#2
0
                    Cr1, Cr2 = [], []
                    for i in range(sample_num):
                        o_n = env.reset()
                        cr1, cr2 = 0, 0
                        for step in range(max_path_length):
                            actions = []
                            for sub_pid in groups[0]:
                                a1, _ = player1[sub_pid].get_action(
                                    o_n[sub_pid])
                                actions.append(a1)
                            for sub_pid in groups[1]:
                                a2, _ = player2[sub_pid].get_action(
                                    o_n[sub_pid])
                                actions.append(a2)
                            o_n, r_n, done, _ = env.step(actions)
                            # env.render()
                            # time.sleep(0.1)
                            cr1 += r_n[groups[0][0]]
                            cr2 += r_n[groups[1][0]]
                            if done.all():
                                break
                        Cr1.append(cr1)
                        Cr2.append(cr2)
                    print('{}: r1: {:.2f}; r2: {:.2f}'.format(
                        pair_name, np.mean(Cr1), np.mean(Cr2)))
                    results[seed][pair_name]['r1'] = Cr1
                    results[seed][pair_name]['r2'] = Cr2

import pickle
f = open(log_file, "wb")
                labels[node] = 'target {}'.format(node-gb.num_agents)
        else:
            if i < gb.num_agents:
                pos[node] = x[node,2:4].numpy()
                labels[node] = 'agent {}'.format(node)
            else:
                pos[node] = x[node,4:6].numpy()
                labels[node] = 'target {}'.format(node-gb.num_agents)
        color_map.append('C'+str(node))

    plt.figure()
    networkx.draw(ng,pos,node_color=color_map,labels=labels)
    plt.show()

from simple_spread_graph import SimpleSpreadGraphBuilder
gb = SimpleSpreadGraphBuilder(
                            num_agents=env.scenario.num_agents,
                            num_landmarks=env.scenario.num_landmarks,
                            batch_size=5,
                            single_observe=args.single_observe,
                            contain_self_loop=True,
                        )

obs = env.reset()
env.render()
check_graph(gb, obs)
while True:
    obs,r,done,info = env.step([np.random.rand(2)*2-1 for i in range(num_agent)])
    env.render()
    check_graph(gb, obs)