Python Environment示例，deep_logistics.environment.Environment Python示例

示例#1

0

显示文件

    def __init__(self, state_representation, fps=60, ups=None):
        self.env = Environment(
            height=10,
            width=10,
            depth=3,
            agents=1,
            agent_class=AIAgent,
            draw_screen=True,
            tile_height=32,
            tile_width=32,
            # scheduler=RandomScheduler,
            ups=ups,
            ticks_per_second=1,
            spawn_interval=1,  # In seconds
            task_generate_interval=1,  # In seconds
            task_assign_interval=1,  # In seconds
            delivery_points=[(7, 2), (2, 2), (2, 7), (7, 7)])

        self.state_representation = state_representation(self.env)

        # Assumes that all agnets have spawned already and that all tasks are assigned.
        self.env.deploy_agents()
        self.env.task_assignment()
        self.last_time = time.time()
        self.pickup_count = 0
        self.delivery_count = 0
        self.stat_deliveries = []
        self.episode = 0

        # env.daemon = True
        # env.start()

        self.player = self.env.agents[0]

示例#2

0

显示文件

class DeepLogisticBase(MultiAgentEnv):
    def __init__(self,
                 height,
                 width,
                 ai_count,
                 agent_count,
                 agent,
                 ups,
                 delivery_points,
                 state,
                 render_screen=False):
        self.render_screen = render_screen
        os.environ["MKL_NUM_THREADS"] = "1"
        self.env = Environment(
            height=height,
            width=width,
            depth=3,
            agents=ai_count,
            agent_class=agent,
            draw_screen=self.render_screen,
            tile_height=32,
            tile_width=32,
            #scheduler=RandomScheduler,
            ups=ups,
            ticks_per_second=1,
            spawn_interval=1,  # In steps
            task_generate_interval=1,  # In steps
            task_assign_interval=1,  # In steps
            delivery_points=delivery_points)

        self.statistics = Statistics()

        assert ai_count < agent_count

        self.state_representation = state(self.env)
        self.observation_space = self.state_representation.generate(
            self.env.agents[0])
        self.action_space = Discrete(self.env.action_space.N_ACTIONS)

        self.grouping = {'group_1': ["agent_%s" % x for x in range(ai_count)]}
        self.agents = {
            k: self.env.agents[i]
            for i, k in enumerate(self.grouping["group_1"])
        }
        obs_space = Tuple([self.observation_space for _ in range(ai_count)])
        act_space = Tuple([self.action_space for _ in range(ai_count)])
        """self.with_agent_groups(
            groups=self.grouping,
            obs_space=obs_space,
            act_space=act_space
        )"""
        """Spawn all agents etc.."""
        self.env.deploy_agents()
        self.env.task_assignment()

        self.episode = 0

    def get_agents(self):
        return self.env.agents

示例#3

0

显示文件

文件： env.py 项目： perara/deep_logistics_ml

    def __init__(self,
                 state,
                 reward,
                 width,
                 height,
                 depth,
                 taxi_n,
                 group_type="individual",
                 graphics_render=False,
                 delivery_locations=None):
        os.environ["MKL_NUM_THREADS"] = "1"

        self.env = Environment(
            width=width,
            height=height,
            depth=depth,
            taxi_n=taxi_n,
            taxi_agent=Agent,
            ups=None,
            graphics_render=graphics_render,
            delivery_locations=delivery_locations,
            spawn_strategy=spawn_strategy.RandomSpawnStrategy)

        self.state_representation = state(self.env)
        self.reward_function = reward
        self._render = graphics_render

        self.observation_space = Box(low=-1,
                                     high=1,
                                     shape=self.state_representation.generate(
                                         self.env.agents[0]).shape,
                                     dtype=np.float32)
        self.action_space = Discrete(self.env.action_space.N_ACTIONS)

        self.agents = {
            "agent_%s" % i: self.env.agents[i]
            for i in range(taxi_n)
        }

        self.total_steps = 0
        """Set up grouping for the environments."""
        if group_type == "individual":
            self.grouping = {
                "group_%s" % x: ["agent_%s" % x]
                for x in range(taxi_n)
            }
        elif group_type == "grouped":
            self.grouping = {
                'group_1': ["agent_%s" % x for x in range(taxi_n)]
            }
        else:
            raise NotImplementedError("The group type %s is not implemented." %
                                      group_type)

        self.with_agent_groups(
            groups=self.grouping,
            obs_space=Tuple([self.observation_space for _ in range(taxi_n)]),
            act_space=Tuple([self.action_space for _ in range(taxi_n)]))

示例#4

0

显示文件

class DeepLogisticsNormal(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):

        self.env = Environment(height=5,
                               width=5,
                               depth=3,
                               ups=None,
                               ticks_per_second=1,
                               taxi_n=1,
                               taxi_agent=None,
                               taxi_respawn=False,
                               taxi_control="constant",
                               scheduler=OnDemandScheduler,
                               delivery_locations=None,
                               spawn_strategy=LocationSpawnStrategy,
                               graphics_render=True,
                               graphics_tile_height=16,
                               graphics_tile_width=16)
        self.frame_skip = 4
        self.agent = self.env.get_agent(0)
        self.sgen = StateFull(self.env)
        self._seed = 0

        self.action_space = gym.spaces.Discrete(self.env.action_space.n)
        self.observation_space = self.sgen.generate(self.agent).shape

    def step(self, action):

        self.agent.do_action(action)

        #for _ in range(self.frame_skip):  # TODO will f**k up reward
        self.env.update()
        self.env.render()

        state1 = self.sgen.generate(self.agent)
        reward, terminal = Reward0(self.agent)
        if terminal:
            info = dict(deliveries=self.agent.total_deliveries,
                        pickups=self.agent.total_pickups)
        else:
            info = None

        return state1, reward, terminal, info

    def reset(self):
        self.env.reset()
        return self.sgen.generate(self.agent)

    def render(self, mode='human', close=False):
        return self.sgen.generate(self.agent)

示例#5

0

显示文件

    def __init__(self):

        self.env = Environment(height=5,
                               width=5,
                               depth=3,
                               ups=None,
                               ticks_per_second=1,
                               taxi_n=1,
                               taxi_agent=None,
                               taxi_respawn=False,
                               taxi_control="constant",
                               scheduler=OnDemandScheduler,
                               delivery_locations=None,
                               spawn_strategy=LocationSpawnStrategy,
                               graphics_render=True,
                               graphics_tile_height=16,
                               graphics_tile_width=16)
        self.frame_skip = 4
        self.agent = self.env.get_agent(0)
        self.sgen = StateFull(self.env)
        self._seed = 0

        self.action_space = gym.spaces.Discrete(self.env.action_space.n)
        self.observation_space = self.sgen.generate(self.agent).shape

示例#6

0

显示文件

文件： env.py 项目： perara/deep_logistics_ml

class BaseDeepLogisticsMultiEnv(MultiAgentEnv):
    def __init__(self,
                 state,
                 reward,
                 width,
                 height,
                 depth,
                 taxi_n,
                 group_type="individual",
                 graphics_render=False,
                 delivery_locations=None):
        os.environ["MKL_NUM_THREADS"] = "1"

        self.env = Environment(
            width=width,
            height=height,
            depth=depth,
            taxi_n=taxi_n,
            taxi_agent=Agent,
            ups=None,
            graphics_render=graphics_render,
            delivery_locations=delivery_locations,
            spawn_strategy=spawn_strategy.RandomSpawnStrategy)

        self.state_representation = state(self.env)
        self.reward_function = reward
        self._render = graphics_render

        self.observation_space = Box(low=-1,
                                     high=1,
                                     shape=self.state_representation.generate(
                                         self.env.agents[0]).shape,
                                     dtype=np.float32)
        self.action_space = Discrete(self.env.action_space.N_ACTIONS)

        self.agents = {
            "agent_%s" % i: self.env.agents[i]
            for i in range(taxi_n)
        }

        self.total_steps = 0
        """Set up grouping for the environments."""
        if group_type == "individual":
            self.grouping = {
                "group_%s" % x: ["agent_%s" % x]
                for x in range(taxi_n)
            }
        elif group_type == "grouped":
            self.grouping = {
                'group_1': ["agent_%s" % x for x in range(taxi_n)]
            }
        else:
            raise NotImplementedError("The group type %s is not implemented." %
                                      group_type)

        self.with_agent_groups(
            groups=self.grouping,
            obs_space=Tuple([self.observation_space for _ in range(taxi_n)]),
            act_space=Tuple([self.action_space for _ in range(taxi_n)]))

    def step(self, action_dict):
        self.total_steps += 1

        # TODO this loop does not make sense when using multiple policies.
        #  Now we do 1 action for all taxis with a single policy (i think) instead of 1 action per policy
        # Cluster: https://ray.readthedocs.io/en/latest/install-on-docker.html#launch-ray-in-docker<
        info_dict = {}
        reward_dict = {}
        terminal_dict = {"__all__": False}
        state_dict = {}
        """Perform actions in environment."""
        for agent_name, action in action_dict.items():
            self.agents[agent_name].do_action(action=action)
        """Update the environment"""
        self.env.update()
        if self._render:
            self.env.render()
        """Evaluate score"""
        t__all__ = False
        for agent_name, agent in action_dict.items():
            reward, terminal = self.reward_function(self.agents[agent_name])

            reward_dict[agent_name] = reward
            terminal_dict[agent_name] = terminal

            if terminal:
                t__all__ = terminal

            state_dict[agent_name] = self.state_representation.generate(
                self.agents[agent_name])
        """Update terminal dict"""
        terminal_dict["__all__"] = t__all__

        return state_dict, reward_dict, terminal_dict, info_dict

    def reset(self):
        self.env.reset()
        self.total_steps = 0

        return {
            agent_name: self.state_representation.generate(agent)
            for agent_name, agent in self.agents.items()
        }

    @staticmethod
    def on_episode_end(info):
        episode = info["episode"]
        env = info["env"].envs[0]

        deliveries = 0
        pickups = 0
        for agent in env.env.agents:
            deliveries += agent.total_deliveries
            pickups += agent.total_pickups

        deliveries = deliveries / len(env.env.agents)
        pickups = pickups / len(env.env.agents)

        episode.custom_metrics["deliveries"] = deliveries
        episode.custom_metrics["pickups"] = pickups

示例#7

0

显示文件

class Env:
    def __init__(self, state_representation, fps=60, ups=None):
        self.env = Environment(
            height=10,
            width=10,
            depth=3,
            agents=1,
            agent_class=AIAgent,
            draw_screen=True,
            tile_height=32,
            tile_width=32,
            # scheduler=RandomScheduler,
            ups=ups,
            ticks_per_second=1,
            spawn_interval=1,  # In seconds
            task_generate_interval=1,  # In seconds
            task_assign_interval=1,  # In seconds
            delivery_points=[(7, 2), (2, 2), (2, 7), (7, 7)])

        self.state_representation = state_representation(self.env)

        # Assumes that all agnets have spawned already and that all tasks are assigned.
        self.env.deploy_agents()
        self.env.task_assignment()
        self.last_time = time.time()
        self.pickup_count = 0
        self.delivery_count = 0
        self.stat_deliveries = []
        self.episode = 0

        # env.daemon = True
        # env.start()

        self.player = self.env.agents[0]

    def step(self, action):
        state = self.player.state
        self.player.do_action(action=action)
        self.env.update()
        new_state = self.player.state
        # print("%s => %s" % (state, new_state))
        """Fast-forward the game until the player is respawned."""
        while self.player.state == Agent.INACTIVE:
            self.env.update()

        state = self.state_representation.generate(self.env.agents[0])

        if self.player.state in [Agent.IDLE, Agent.MOVING]:
            reward = -0.01
            terminal = False
        elif self.player.state in [Agent.PICKUP]:
            self.pickup_count += 1
            reward = 1
            terminal = False
            # print("Pickup", state, self.player.task.c_1)
        elif self.player.state in [Agent.DELIVERY]:
            self.delivery_count += 1
            reward = 10
            terminal = False
            # print("Delivery", state)
        elif self.player.state in [Agent.DESTROYED]:
            reward = -1
            terminal = True

        else:
            raise NotImplementedError(
                "Should never happen. all states should be handled somehow")

        return state, reward, terminal, {}

    def reset(self):
        print(
            "[%s] Environment was reset, took: %s seconds. Pickups: %s, Deliveries: %s"
            % (self.episode, time.time() - self.last_time, self.pickup_count,
               self.delivery_count))
        self.last_time = time.time()
        self.stat_deliveries.append(self.delivery_count)
        if self.episode % 50 == 0:
            self.graph()

        self.pickup_count = 0
        self.delivery_count = 0
        self.episode += 1
        self.env.reset()

    def render(self):
        self.env.render()
        return self.state_representation.generate(self.env.agents[0])

    def graph(self):
        plt.plot([x for x in range(len(self.stat_deliveries))],
                 self.stat_deliveries,
                 color='blue')
        plt.xlabel('Episode')
        plt.ylabel('Number of Successive Deliveries')
        plt.title('Deep Logistics - PPO - Experiment A')
        plt.savefig("./ppo-experiment.png")

示例#8

0

显示文件

文件： debugger.py 项目： perara/deep_logistics_ml

sys.path.append("/home/per/GIT/code/deep_logistics")
sys.path.append("/root")
from deep_logistics.environment import Environment
from deep_logistics.agent import InputAgent
from state_representations import State0

if __name__ == "__main__":
    env = Environment(
        height=10,
        width=10,
        depth=3,
        agents=2,
        agent_class=InputAgent,
        draw_screen=True,
        tile_height=32,
        tile_width=32,
        #scheduler=RandomScheduler,
        ups=60,
        ticks_per_second=1,
        spawn_interval=1,  # In steps
        task_generate_interval=1,  # In steps
        task_assign_interval=1,  # In steps
        delivery_points=[(7, 2), (2, 2), (2, 7), (7, 7)],
    )

    env.deploy_agents()
    env.task_assignment()
    state = State0(env)
    agent = env.agents[0]

    def on_event():

示例#9

0

显示文件

sys.path.append("/home/per/IdeaProjects/deep_logistics")
sys.path.append("/home/per/GIT/code/deep_logistics")
sys.path.append("/root")
from deep_logistics.environment import Environment
from deep_logistics.agent import InputAgent

if __name__ == "__main__":
    env =  Environment(
        height=5,
        width=5,
        depth=3,
        ups=None,
        ticks_per_second=1,
        taxi_n=1,
        taxi_agent=InputAgent,
        taxi_respawn=False,
        taxi_control="constant",
        scheduler=OnDemandScheduler,
        delivery_locations=None,
        spawn_strategy=LocationSpawnStrategy,
        graphics_render=True,
        graphics_tile_height=64,
        graphics_tile_width=64
    )

    env.deploy_agents()
    env.task_assignment()
    state = State0(env)
    agent = env.agents[0]

    def on_event():