max_path_length = args.mpl path_length = 0 done = np.array([False] * num_agent) c_r = np.zeros(num_agent) max_o = 0. while True: path_length += 1 a_n = [] for i in range(num_agent): # a = input("Action for agent {}:\n".format(i)) # a = np.array(list(map(float,a.split(' ')))) # a_n.append(a) a = env.action_space.sample() a_n.append(a) o_n, r_n, done, _ = env.step(a_n) c_r += r_n env.render() print("step: ", path_length) print("a: ", a_n) print("o: ", np.max(np.abs(o_n))) if np.max(np.abs(o_n)) > max_o: max_o = np.max(np.abs(o_n)) print('r: ', r_n) print(done) # pbd.set_trace() time.sleep(0.1) if path_length > max_path_length or done.all(): print('c_r: ', c_r) print('max_o: ', max_o) path_length = 0
Cr1, Cr2 = [], [] for i in range(sample_num): o_n = env.reset() cr1, cr2 = 0, 0 for step in range(max_path_length): actions = [] for sub_pid in groups[0]: a1, _ = player1[sub_pid].get_action( o_n[sub_pid]) actions.append(a1) for sub_pid in groups[1]: a2, _ = player2[sub_pid].get_action( o_n[sub_pid]) actions.append(a2) o_n, r_n, done, _ = env.step(actions) # env.render() # time.sleep(0.1) cr1 += r_n[groups[0][0]] cr2 += r_n[groups[1][0]] if done.all(): break Cr1.append(cr1) Cr2.append(cr2) print('{}: r1: {:.2f}; r2: {:.2f}'.format( pair_name, np.mean(Cr1), np.mean(Cr2))) results[seed][pair_name]['r1'] = Cr1 results[seed][pair_name]['r2'] = Cr2 import pickle f = open(log_file, "wb")
labels[node] = 'target {}'.format(node-gb.num_agents) else: if i < gb.num_agents: pos[node] = x[node,2:4].numpy() labels[node] = 'agent {}'.format(node) else: pos[node] = x[node,4:6].numpy() labels[node] = 'target {}'.format(node-gb.num_agents) color_map.append('C'+str(node)) plt.figure() networkx.draw(ng,pos,node_color=color_map,labels=labels) plt.show() from simple_spread_graph import SimpleSpreadGraphBuilder gb = SimpleSpreadGraphBuilder( num_agents=env.scenario.num_agents, num_landmarks=env.scenario.num_landmarks, batch_size=5, single_observe=args.single_observe, contain_self_loop=True, ) obs = env.reset() env.render() check_graph(gb, obs) while True: obs,r,done,info = env.step([np.random.rand(2)*2-1 for i in range(num_agent)]) env.render() check_graph(gb, obs)