示例#1
0
            break
        env.render()
        cnt = cnt + 1
        action = RL.choose_action(observation)  # 选行为
        action_onehot = np.zeros(env.action_space[0].n)
        action_onehot[action] += 1.0
        observation_, reward, done, info = env.step([action_onehot
                                                     ])  # 获取下一个 state
        observation_ = observation_[0]
        reward = reward[0]
        done = done[0]

        # 保存这一组记忆
        RL.store_transition(observation, action, reward, observation_)

        # 学习
        RL.learn()

        observation = observation_
        # display rewards
        for agent in env.world.agents:
            if not agent.name in reward_dic:
                reward_dic[agent.name] = []
            reward_dic[agent.name].append(env._get_reward(agent))
            print(agent.name + " reward: %0.3f" % env._get_reward(agent))
    for agent in env.world.agents:
        y = reward_dic[agent.name]
        x = np.linspace(1, len(y), len(y))
        plt.plot(x, y)
        plt.show()
示例#2
0
    # create interactive policies for each agent
    policies = [InteractivePolicy(env, i) for i in range(env.n)]
    # execution loop
    obs_n = env.reset()
    while True:
        # query for action from each agent's policy
        act_n = []
        observation = obs_n[0]
        length = observation.shape[1]
        for i, policy in enumerate(policies):
            if i < length - 1:
                obs = observation[i, :]
                act_n.append(policy.action(obs))
            if i == length - 1:
                act_n.append(policy.action(observation))
        # step environment
        obs_n, reward_n, done_n, _ = env.step(act_n)

        # render all agent views
        env.render()

        #print action
        print(act_n)
        #get observation/state
        env_obs = obs_n[0]
        print(env_obs)
        #get reward
        env_reward = []
        for agent in env.world.agents:
            env_reward.append(env._get_reward(agent))
        print(env_reward)
示例#3
0
    # load scenario from script
    scenario = scenarios.load(args.scenario).Scenario()
    # create world
    world = scenario.make_world()  # world is a class of world
    # create multiagent environment
    env = MultiAgentEnv(world,
                        scenario.reset_world,
                        scenario.reward,
                        scenario.observation,
                        info_callback=None,
                        shared_viewer=False)
    # render call to create viewer window (necessary only for interactive policies)
    env.render()
    # create interactive policies for each agent
    policies = [InteractivePolicy(env, i) for i in range(env.n)
                ]  # create for each agent in the simulation world
    # execution loop
    obs_n = env.reset()
    while True:
        # query for action from each agent's policy
        act_n = []
        for i, policy in enumerate(policies):
            act_n.append(policy.action(obs_n[i]))
        # step environment
        obs_n, reward_n, done_n, _ = env.step(act_n)
        # render all agent views
        env.render()
        # display rewards
        for agent in env.world.agents:
            print(agent.name + " reward: %0.3f" % env._get_reward(agent))
示例#4
0
    env.render()
    # create interactive policies for each agent
    policies = [InteractivePolicy(env,i) for i in range(env.n)]
    # execution loop
    obs_n = env.reset()

    rewards = np.zeros(len(env.world.agents))

    print ('env.discrete_action_space:', env.discrete_action_space)

    while True:
        # query for action from each agent's policy
        act_n = []
        for i, policy in enumerate(policies):
            act_n.append(policy.action(obs_n[i]))
        # step environment
        obs_n, reward_n, done_n, _ = env.step(act_n)
        # render all agent views
        env.render()
        # display rewards

        new_rewards = np.zeros(len(env.world.agents))
        for i, agent in enumerate(env.world.agents):
            new_rewards[i] = env._get_reward(agent)
        if (np.abs(rewards - new_rewards) > .001).any():
            print(rewards - new_rewards)
            rewards = new_rewards
            for i, r in enumerate(rewards):
                print('agent {} reward: {:.3f}'.format(i, r))