Exemplo n.º 1
0
    cgca = data['trainer/cgca']
    cactor_n = data['trainer/cactor_n']
    from rlkit.torch.policies.make_deterministic import MakeDeterministic
    cactor_n = [MakeDeterministic(cactor) for cactor in cactor_n]

for a2 in a2s:
    o_n = env.reset()
    cg_inputs = torch.cat([
        torch.tensor(o_n),
        torch.tensor([[np.random.rand() * 2. - 1.], [a2]])
    ],
                          dim=-1).float()[None, :]
    # cg_inputs = torch.cat([torch.tensor(o_n), torch.tensor([[0.5],[a2]])],dim=-1).float()[None,:]
    if context_graph:
        contexts = context_graph(cg_inputs)
        c1, _ = cactor.get_action(contexts[:, 0, :])
        c1s.append(c1[0])
    else:
        if cactor:
            cactions, _ = cactor.get_action(cg_inputs[0])
            c1s.append(cactions[0, :])
        else:
            contexts = cgca(cg_inputs)
            c1, _ = cactor_n[0].get_action(contexts[:, 0, :])
            c1s.append(c1[0])

for a1 in a1s:
    o_n = env.reset()
    cg_inputs = torch.cat([
        torch.tensor(o_n),
        torch.tensor([[a1], [np.random.rand() * 2. - 1.]])
Exemplo n.º 2
0
            eval_policy = MakeDeterministic(policy)

            returns = []
            success_num = 0
            collision_num = 0
            inference_correct = 0
            inference_total = 0
            for _ in range(args.epoch):
                o = env.reset()
                policy.reset()
                path_length = 0
                done = False
                c_r = 0.
                while True:
                    path_length += 1
                    a, agent_info = eval_policy.get_action(o)
                    o, r, done, env_info = env.step(a)

                    if 'intentions' in agent_info.keys():
                        intention_probs = agent_info['intentions']
                        inffered_intentions = np.argmax(intention_probs,
                                                        axis=-1)
                        true_intentions = env.get_sup_labels()
                        valid_mask = ~np.isnan(true_intentions)
                        true_intentions = true_intentions[valid_mask]
                        inffered_intentions = inffered_intentions[valid_mask]
                        inference_correct += np.sum(
                            inffered_intentions == true_intentions)
                        inference_total += np.sum(valid_mask)
                    else:
                        inference_total += 1
Exemplo n.º 3
0
if args.eval:
    if not args.log_dir.startswith('MADDPG'):
        from rlkit.torch.policies.make_deterministic import MakeDeterministic
        p1 = MakeDeterministic(data['trainer/trained_policy_n'][0])
        p2 = MakeDeterministic(data['trainer/trained_policy_n'][1])

plt.figure()
with torch.no_grad():
    for _ in range(args.num):
        o_n = env.reset()
        xs = [o_n[0][0]]
        ys = [o_n[0][1]]
        obs1 = torch.tensor(o_n[0]).float()
        obs2 = torch.tensor(o_n[1]).float()
        for i in range(5):
            a1, _ = p1.get_action(obs1)
            a2, _ = p2.get_action(obs2)
            o_n, r_n, done, info = env.step([a1, a2])
            xs.append(o_n[0][0])
            ys.append(o_n[0][1])
            obs1 = torch.tensor(o_n[0]).float()
            obs2 = torch.tensor(o_n[1]).float()
        plt.plot(xs, ys, 'o-')
        plt.plot(xs[0], ys[0], 'o', color='green')
        plt.plot(xs[-1], ys[-1], 'o', color='black')

plt.gca().set_aspect('equal', 'box')
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.xlabel('x')
plt.ylabel('y')
Exemplo n.º 4
0
with torch.no_grad():
    for agent in range(args.num_ag):
        cs[agent] = []
        if agent < int(args.num_ag/2):
            for a2 in opponent_as:
                o_n = env.reset()
                actions = torch.zeros(args.num_ag)
                actions[:int(args.num_ag/2)] = 0.
                actions[int(args.num_ag/2):] = a2
                actions = actions[:,None]
                cg_inputs = torch.cat([torch.tensor(o_n), actions],dim=-1).float()[None,:]
                if cactor:
                    if cgca:
                        contexts = cgca(cg_inputs)
                        cactions, _ = cactor.get_action(contexts[0])
                    else:
                        cactions, _ = cactor.get_action(cg_inputs[0])
                    cs[agent].append(cactions[agent,0])
                else:
                    if cgca:
                        contexts = cgca(cg_inputs)
                    else:
                        contexts = cgca_n[agent](cg_inputs)
                    ca, _ = cactor_n[agent].get_action(contexts[:,agent,:][0])
                    cs[agent].append(ca[0])
        else:
            for a1 in opponent_as:
                o_n = env.reset()
                actions = torch.zeros(args.num_ag)
                actions[:int(args.num_ag/2)] = a1
Exemplo n.º 5
0
c2s = []

d_path = pre_path + '/' + 'seed' + str(args.seed) + '/params.pkl'
data = torch.load(d_path, map_location='cpu')

c1net = data['trainer/cactor_n'][0]
c2net = data['trainer/cactor_n'][1]
from rlkit.torch.policies.make_deterministic import MakeDeterministic

c1net = MakeDeterministic(c1net)
c2net = MakeDeterministic(c2net)

for a2 in a2s:
    o_n = env.reset()
    c1_input = torch.tensor([o_n[0][0], a2]).float()
    c1, _ = c1net.get_action(c1_input)
    c1s.append(c1[0])

for a1 in a1s:
    o_n = env.reset()
    c2_input = torch.tensor([o_n[1][0], a1]).float()
    c2, _ = c2net.get_action(c2_input)
    c2s.append(c2[0])

# plt.figure()
# plt.subplot(1,2,1)
# plt.plot(a2s,c1s)
# plt.gca().set_aspect('equal', 'box')
# plt.gca().axes.get_xaxis().set_ticks([-1.,0.,1.])
# plt.gca().axes.get_yaxis().set_ticks([-1.,0.,1.])
# plt.xlim(-1,1)