Пример #1
0
def zero_play(**args):
    env = Env(**args)
    _, pa, is_done = env.step(None)
    while not is_done:
        action = Action(pa.vessel_idx, pa.port_idx, 0)
        r, pa, is_done = env.step(action)
    return env.snapshot_list
Пример #2
0
 def _init_env(self, backend_name: str) -> None:
     os.environ["DEFAULT_BACKEND_NAME"] = backend_name
     self._env = Env(
         scenario="cim",
         topology=self._reload_topology,
         start_tick=0,
         durations=200,
         options={"enable-dump-snapshot": tempfile.gettempdir()})
     self._business_engine = self._env.business_engine
Пример #3
0
    def setUp(self):
        env = Env(scenario="vm_scheduling",
                  topology="tests/data/vm_scheduling/azure.2019.toy",
                  start_tick=0,
                  durations=5,
                  snapshot_resolution=1)
        metrics, decision_event, is_done = env.step(None)

        while not is_done:
            action = AllocateAction(vm_id=decision_event.vm_id,
                                    pm_id=decision_event.valid_pms[0])
            self.metrics, decision_event, is_done = env.step(action)
Пример #4
0
def launch(config, distributed_config):
    config = convert_dottable(config)
    distributed_config = convert_dottable(distributed_config)
    env = Env(config.env.scenario, config.env.topology, durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    state_shaper = CIMStateShaper(**config.env.state_shaping)
    action_shaper = CIMActionShaper(action_space=list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions)))
    experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping)

    config["agents"]["algorithm"]["input_dim"] = state_shaper.dim
    agent_manager = DQNAgentManager(
        name="cim_actor",
        mode=AgentManagerMode.INFERENCE,
        agent_dict=create_dqn_agents(agent_id_list, config.agents),
        state_shaper=state_shaper,
        action_shaper=action_shaper,
        experience_shaper=experience_shaper
    )
    proxy_params = {
        "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group,
        "expected_peers": {"learner": 1},
        "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port),
        "max_retries": 15
    }
    actor_worker = ActorWorker(
        local_actor=SimpleActor(env=env, agent_manager=agent_manager),
        proxy_params=proxy_params
    )
    actor_worker.launch()
Пример #5
0
def launch(config):
    config = convert_dottable(config)
    env = Env(config.env.scenario,
              config.env.topology,
              durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    config["agents"]["input_dim"] = CIMStateShaper(
        **config.env.state_shaping).dim
    agent_manager = POAgentManager(name="cim_learner",
                                   mode=AgentManagerMode.TRAIN,
                                   agent_dict=create_po_agents(
                                       agent_id_list, config.agents))

    proxy_params = {
        "group_name": os.environ["GROUP"],
        "expected_peers": {
            "actor": int(os.environ["NUM_ACTORS"])
        },
        "redis_address": ("localhost", 6379)
    }

    learner = SimpleLearner(
        agent_manager=agent_manager,
        actor=ActorProxy(proxy_params=proxy_params,
                         experience_collecting_func=
                         merge_experiences_with_trajectory_boundaries),
        scheduler=Scheduler(config.main_loop.max_episode),
        logger=Logger("cim_learner", auto_timestamp=False))
    learner.learn()
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
    learner.exit()
Пример #6
0
def launch(config):
    config = convert_dottable(config)
    # Step 1: Initialize a CIM environment for using a toy dataset.
    env = Env(config.env.scenario, config.env.topology, durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    action_space = list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions))

    # Step 2: Create state, action and experience shapers. We also need to create an explorer here due to the
    # greedy nature of the DQN algorithm.
    state_shaper = CIMStateShaper(**config.env.state_shaping)
    action_shaper = CIMActionShaper(action_space=action_space)
    experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping)

    # Step 3: Create agents and an agent manager.
    config["agents"]["algorithm"]["input_dim"] = state_shaper.dim
    agent_manager = DQNAgentManager(
        name="cim_learner",
        mode=AgentManagerMode.TRAIN_INFERENCE,
        agent_dict=create_dqn_agents(agent_id_list, config.agents),
        state_shaper=state_shaper,
        action_shaper=action_shaper,
        experience_shaper=experience_shaper
    )

    # Step 4: Create an actor and a learner to start the training process.
    scheduler = TwoPhaseLinearParameterScheduler(config.main_loop.max_episode, **config.main_loop.exploration)
    actor = SimpleActor(env, agent_manager)
    learner = SimpleLearner(
        agent_manager, actor, scheduler,
        logger=Logger("cim_learner", format_=LogFormat.simple, auto_timestamp=False)
    )
    learner.learn()
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
Пример #7
0
def cim_dqn_actor():
    env = Env(**training_config["env"])
    agent = MultiAgentWrapper(
        {name: get_dqn_agent()
         for name in env.agent_idx_list})
    actor = Actor(env,
                  agent,
                  CIMTrajectoryForDQN,
                  trajectory_kwargs=common_config)
    actor.as_worker(training_config["group"], log_dir=log_dir)
Пример #8
0
    def run(self):
        """Initialize environment and process commands."""
        metrics = None
        decision_event = None,
        is_done = False

        env = Env(*self._args, **self._kwargs)

        while True:
            cmd, content = self._pipe.recv()

            if cmd == "step":
                if is_done:
                    # Skip is current environment is completed.
                    self._pipe.send((None, None, True, env.frame_index))
                else:
                    metrics, decision_event, is_done = env.step(content)

                    self._pipe.send((metrics, decision_event))
            elif cmd == "reset":
                env.reset()

                metrics = None
                decision_event = None
                is_done = False

                self._pipe.send(None)
            elif cmd == "query":
                node_name, args = content

                states = env.snapshot_list[node_name][args]

                self._pipe.send(states)
            elif cmd == "tick":
                self._pipe.send(env.tick)
            elif cmd == "frame_index":
                self._pipe.send(env.frame_index)
            elif cmd == "is_done":
                self._pipe.send(is_done)
            elif cmd == "stop":
                self._pipe.send(None)
                break
Пример #9
0
def cim_dqn_learner():
    env = Env(**training_config["env"])
    agent = MultiAgentWrapper(
        {name: get_dqn_agent()
         for name in env.agent_idx_list})
    scheduler = TwoPhaseLinearParameterScheduler(
        training_config["max_episode"], **training_config["exploration"])
    actor = ActorProxy(
        training_config["group"],
        training_config["num_actors"],
        update_trigger=training_config["learner_update_trigger"])
    learner = OffPolicyLearner(actor, scheduler, agent,
                               **training_config["training"])
    learner.run()
Пример #10
0
def launch(config, distributed_config):
    config = convert_dottable(config)
    distributed_config = convert_dottable(distributed_config)
    env = Env(config.env.scenario,
              config.env.topology,
              durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]

    config["agents"]["algorithm"]["input_dim"] = CIMStateShaper(
        **config.env.state_shaping).dim
    agent_manager = DQNAgentManager(name="cim_learner",
                                    mode=AgentManagerMode.TRAIN,
                                    agent_dict=create_dqn_agents(
                                        agent_id_list, config.agents))

    proxy_params = {
        "group_name":
        os.environ["GROUP"]
        if "GROUP" in os.environ else distributed_config.group,
        "expected_peers": {
            "actor":
            int(os.environ["NUM_ACTORS"] if "NUM_ACTORS" in
                os.environ else distributed_config.num_actors)
        },
        "redis_address":
        (distributed_config.redis.hostname, distributed_config.redis.port),
        "max_retries":
        15
    }

    learner = SimpleLearner(
        agent_manager=agent_manager,
        actor=ActorProxy(
            proxy_params=proxy_params,
            experience_collecting_func=concat_experiences_by_agent),
        scheduler=TwoPhaseLinearParameterScheduler(
            config.main_loop.max_episode, **config.main_loop.exploration),
        logger=Logger("cim_learner", auto_timestamp=False))
    learner.learn()
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
    learner.exit()
Пример #11
0
def test_cim():
    eps = 4

    env = Env("cim", "toy.5p_ssddd_l0.0", durations=MAX_TICK)

    start_time = time()

    for _ in range(eps):
        _, _, is_done = env.step(None)

        while not is_done:
            _, _, is_done = env.step(None)

        env.reset()

    end_time = time()

    print(f"cim 5p toplogy with {MAX_TICK} total time cost: {(end_time - start_time)/eps}")
Пример #12
0
                           format_=LogFormat.none,
                           dump_folder=LOG_PATH,
                           dump_mode="w",
                           auto_timestamp=False)
ilp_logger = Logger(tag="ilp",
                    format_=LogFormat.none,
                    dump_folder=LOG_PATH,
                    dump_mode="w",
                    auto_timestamp=False)

if __name__ == "__main__":
    start_time = timeit.default_timer()

    env = Env(scenario=config.env.scenario,
              topology=config.env.topology,
              start_tick=config.env.start_tick,
              durations=config.env.durations,
              snapshot_resolution=config.env.resolution)
    shutil.copy(os.path.join(env._business_engine._config_path, "config.yml"),
                os.path.join(LOG_PATH, "BEconfig.yml"))
    shutil.copy(CONFIG_PATH, os.path.join(LOG_PATH, "config.yml"))

    if config.env.seed is not None:
        env.set_seed(config.env.seed)

    metrics: object = None
    decision_event: DecisionPayload = None
    is_done: bool = False
    action: Action = None

    metrics, decision_event, is_done = env.step(None)
Пример #13
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import numpy as np

from maro.simulator import Env
from maro.rl import AgentMode, SimpleActor, ActorWorker, KStepExperienceShaper, TwoPhaseLinearExplorer
from config import config
from state_shaper import CIMStateShaper
from action_shaper import CIMActionShaper
from experience_shaper import TruncatedExperienceShaper
from agent_manager import DQNAgentManager

if __name__ == "__main__":
    env = Env(config.env.scenario,
              config.env.topology,
              durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    state_shaper = CIMStateShaper(**config.state_shaping)
    action_shaper = CIMActionShaper(action_space=list(
        np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions)))
    if config.experience_shaping.type == "truncated":
        experience_shaper = TruncatedExperienceShaper(
            **config.experience_shaping.truncated)
    else:
        experience_shaper = KStepExperienceShaper(
            reward_func=lambda mt: 1 - mt["container_shortage"] / mt[
                "order_requirements"],
            **config.experience_shaping.k_step)

    exploration_config = {
Пример #14
0
auto_event_mode = False
start_tick = 0
durations = 100
max_ep = 2

opts = dict()
"""
enable-dump-snapshot parameter means business_engine needs dump snapshot data before reset.
If you leave value to empty string, it will dump to current folder.
For getting dump data, please uncomment below line and specify dump destination folder.
"""
# opts['enable-dump-snapshot'] = ''

env = Env(scenario="citi_bike",
          topology="toy.4s_4t",
          start_tick=start_tick,
          durations=durations,
          snapshot_resolution=60,
          options=opts)

print(env.summary)

for ep in range(max_ep):
    metrics = None
    decision_evt: DecisionEvent = None
    is_done = False
    action = None

    while not is_done:
        metrics, decision_evt, is_done = env.step(action)

        # It will be None at the end.
from maro.simulator import Env

env = Env(scenario="cim",
          topology="toy.5p_ssddd_l0.0",
          start_tick=0,
          durations=100)

metrics, decision_event, is_done = env.step(None)

while not is_done:
    metrics, decision_event, is_done = env.step(None)

print(f"environment metrics: {env.metrics}")
Пример #16
0
class TestCimScenarios(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestCimScenarios, self).__init__(*args, **kwargs)

        with open(os.path.join(TOPOLOGY_PATH_CONFIG, "config.yml"),
                  "r") as input_stream:
            self._raw_topology = yaml.safe_load(input_stream)

        self._env: Optional[Env] = None
        self._reload_topology: str = TOPOLOGY_PATH_CONFIG
        self._business_engine: Optional[CimBusinessEngine] = None

        random.clear()

    def _init_env(self, backend_name: str) -> None:
        os.environ["DEFAULT_BACKEND_NAME"] = backend_name
        self._env = Env(
            scenario="cim",
            topology=self._reload_topology,
            start_tick=0,
            durations=200,
            options={"enable-dump-snapshot": tempfile.gettempdir()})
        self._business_engine = self._env.business_engine

    def test_load_from_config(self) -> None:
        for backend_name in backends_to_test:
            self._init_env(backend_name)

            #########################################################
            if len(
                    self._business_engine.configs
            ) > 0:  # Env will not have `configs` if loaded from dump/real.
                self.assertTrue(
                    compare_dictionary(self._business_engine.configs,
                                       self._raw_topology))

            self.assertEqual(
                len(getattr(self._business_engine.frame, "ports")), 22)
            self.assertEqual(self._business_engine._data_cntr.port_number, 22)
            self.assertEqual(
                len(getattr(self._business_engine.frame, "vessels")), 46)
            self.assertEqual(self._business_engine._data_cntr.vessel_number,
                             46)
            self.assertEqual(len(self._business_engine.snapshots), 0)

            #########################################################
            # Vessel
            vessels: List[
                VesselSetting] = self._business_engine._data_cntr.vessels
            for i, vessel in enumerate(vessels):
                vessel_config = self._raw_topology["vessels"][vessel.name]
                self.assertEqual(vessel.index, i)
                self.assertEqual(vessel.capacity, vessel_config["capacity"])
                self.assertEqual(vessel.parking_duration,
                                 vessel_config["parking"]["duration"])
                self.assertEqual(vessel.parking_noise,
                                 vessel_config["parking"]["noise"])
                self.assertEqual(vessel.start_port_name,
                                 vessel_config["route"]["initial_port_name"])
                self.assertEqual(vessel.route_name,
                                 vessel_config["route"]["route_name"])
                self.assertEqual(vessel.sailing_noise,
                                 vessel_config["sailing"]["noise"])
                self.assertEqual(vessel.sailing_speed,
                                 vessel_config["sailing"]["speed"])

            for name, idx in self._business_engine.get_node_mapping(
            )["vessels"].items():
                self.assertEqual(vessels[idx].name, name)

            #########################################################
            # Port
            ports: List[PortSetting] = self._business_engine._data_cntr.ports
            port_names = [port.name for port in ports]
            for i, port in enumerate(ports):
                assert isinstance(port, SyntheticPortSetting)
                port_config = self._raw_topology["ports"][port.name]
                self.assertEqual(port.index, i)
                self.assertEqual(port.capacity, port_config["capacity"])
                self.assertEqual(port.empty_return_buffer.noise,
                                 port_config["empty_return"]["noise"])
                self.assertEqual(port.full_return_buffer.noise,
                                 port_config["full_return"]["noise"])
                self.assertEqual(
                    port.source_proportion.noise,
                    port_config["order_distribution"]["source"]["noise"])
                for target in port.target_proportions:
                    self.assertEqual(
                        target.noise, port_config["order_distribution"]
                        ["targets"][port_names[target.index]]["noise"])

            for name, idx in self._business_engine.get_node_mapping(
            )["ports"].items():
                self.assertEqual(ports[idx].name, name)

    def test_load_from_real(self) -> None:
        for topology in [TOPOLOGY_PATH_REAL_BIN, TOPOLOGY_PATH_REAL_CSV]:
            self._reload_topology = topology
            for backend_name in backends_to_test:
                self._init_env(backend_name)

                for i, port in enumerate(self._business_engine._ports):
                    self.assertEqual(port.booking, 0)
                    self.assertEqual(port.shortage, 0)

                hard_coded_truth = [556, 0,
                                    20751], [1042, 0,
                                             17320], [0, 0,
                                                      25000], [0, 0, 25000]

                self._env.step(action=None)
                for i, port in enumerate(self._business_engine._ports):
                    self.assertEqual(port.booking, hard_coded_truth[i][0])
                    self.assertEqual(port.shortage, hard_coded_truth[i][1])
                    self.assertEqual(port.empty, hard_coded_truth[i][2])

                self._env.reset(keep_seed=True)
                self._env.step(action=None)
                for i, port in enumerate(self._business_engine._ports):
                    self.assertEqual(port.booking, hard_coded_truth[i][0])
                    self.assertEqual(port.shortage, hard_coded_truth[i][1])
                    self.assertEqual(port.empty, hard_coded_truth[i][2])

        self._reload_topology = TOPOLOGY_PATH_CONFIG

    def test_dump_and_load(self) -> None:
        dump_from_config(os.path.join(TOPOLOGY_PATH_CONFIG, "config.yml"),
                         TOPOLOGY_PATH_DUMP, 200)

        self._reload_topology = TOPOLOGY_PATH_DUMP

        # The reloaded Env should have same behaviors
        self.test_load_from_config()
        self.test_vessel_movement()
        self.test_order_state()
        self.test_order_export()
        self.test_early_discharge()

        self._reload_topology = TOPOLOGY_PATH_CONFIG

    def test_vessel_movement(self) -> None:
        for backend_name in backends_to_test:
            self._init_env(backend_name)

            hard_coded_period = [
                67, 75, 84, 67, 53, 58, 51, 58, 61, 49, 164, 182, 146, 164,
                182, 146, 90, 98, 79, 95, 104, 84, 87, 97, 78, 154, 169, 136,
                154, 169, 94, 105, 117, 94, 189, 210, 167, 189, 210, 167, 141,
                158, 125, 141, 158, 125
            ]
            self.assertListEqual(
                self._business_engine._data_cntr.vessel_period,
                hard_coded_period)

            ports: List[PortSetting] = self._business_engine._data_cntr.ports
            port_names: List[str] = [port.name for port in ports]
            vessel_stops: VesselStopsWrapper = self._business_engine._data_cntr.vessel_stops
            vessels: List[
                VesselSetting] = self._business_engine._data_cntr.vessels

            # Test invalid argument
            self.assertIsNone(vessel_stops[None])

            #########################################################
            for i, vessel in enumerate(vessels):
                start_port_index = port_names.index(vessel.start_port_name)
                self.assertEqual(vessel_stops[i, 0].port_idx, start_port_index)

            #########################################################
            for i, vessel in enumerate(vessels):
                stop_port_indices = [stop.port_idx for stop in vessel_stops[i]]

                raw_route = self._raw_topology["routes"][vessel.route_name]
                route_stop_names = [stop["port_name"] for stop in raw_route]
                route_stop_indices = [
                    port_names.index(name) for name in route_stop_names
                ]
                start_offset = route_stop_indices.index(
                    port_names.index(vessel.start_port_name))

                for j, stop_port_index in enumerate(stop_port_indices):
                    self.assertEqual(
                        stop_port_index,
                        route_stop_indices[(j + start_offset) %
                                           len(route_stop_indices)])

            #########################################################
            # STEP: beginning
            for i, vessel in enumerate(self._business_engine._vessels):
                self.assertEqual(vessel.idx, i)
                self.assertEqual(vessel.next_loc_idx, 0)
                self.assertEqual(vessel.last_loc_idx, 0)

            #########################################################
            self._env.step(action=None)
            self.assertEqual(
                self._env.tick,
                5)  # Vessel 35 will trigger the first arrival event at tick 5
            for i, vessel in enumerate(self._business_engine._vessels):
                if i == 35:
                    self.assertEqual(vessel.next_loc_idx, 1)
                    self.assertEqual(vessel.last_loc_idx, 1)
                else:
                    self.assertEqual(vessel.next_loc_idx, 1)
                    self.assertEqual(vessel.last_loc_idx, 0)

            #########################################################
            self._env.step(action=None)
            self.assertEqual(
                self._env.tick,
                6)  # Vessel 27 will trigger the second arrival event at tick 6
            for i, vessel in enumerate(self._business_engine._vessels):
                if i == 27:  # Vessel 27 just arrives
                    self.assertEqual(vessel.next_loc_idx, 1)
                    self.assertEqual(vessel.last_loc_idx, 1)
                elif i == 35:  # Vessel 35 has already departed
                    self.assertEqual(vessel.next_loc_idx, 2)
                    self.assertEqual(vessel.last_loc_idx, 1)
                else:
                    self.assertEqual(vessel.next_loc_idx, 1)
                    self.assertEqual(vessel.last_loc_idx, 0)

            #########################################################
            while self._env.tick < 100:
                self._env.step(action=None)
            self.assertEqual(self._env.tick, 100)
            for i, vessel in enumerate(self._business_engine._vessels):
                expected_next_loc_idx = expected_last_loc_idx = -1
                for j, stop in enumerate(vessel_stops[i]):
                    if stop.arrival_tick == self._env.tick:
                        expected_next_loc_idx = expected_last_loc_idx = j
                        break
                    if stop.arrival_tick > self._env.tick:
                        expected_next_loc_idx = j
                        expected_last_loc_idx = j - 1
                        break

                self.assertEqual(vessel.next_loc_idx, expected_next_loc_idx)
                self.assertEqual(vessel.last_loc_idx, expected_last_loc_idx)

    def test_order_state(self) -> None:
        for backend_name in backends_to_test:
            self._init_env(backend_name)

            for i, port in enumerate(self._business_engine._ports):
                total_containers = self._raw_topology['total_containers']
                initial_container_proportion = self._raw_topology['ports'][
                    port.name]['initial_container_proportion']

                self.assertEqual(port.booking, 0)
                self.assertEqual(port.shortage, 0)
                self.assertEqual(
                    port.empty,
                    int(total_containers * initial_container_proportion))

            #########################################################
            self._env.step(action=None)
            self.assertEqual(self._env.tick, 5)

            hard_coded_truth = [  # Should get same results under default random seed
                [223, 0, 14726], [16, 0, 916], [18, 0, 917], [89, 0, 5516],
                [84, 0, 4613], [72, 0, 4603], [26, 0, 1374], [24, 0, 1378],
                [48, 0, 2756], [54, 0, 2760], [26, 0, 1379], [99, 0, 5534],
                [137, 0, 7340], [19, 0, 912], [13, 0, 925], [107, 0, 6429],
                [136, 0, 9164], [64, 0, 3680], [24, 0, 1377], [31, 0, 1840],
                [109, 0, 6454], [131, 0, 7351]
            ]
            for i, port in enumerate(self._business_engine._ports):
                self.assertEqual(port.booking, hard_coded_truth[i][0])
                self.assertEqual(port.shortage, hard_coded_truth[i][1])
                self.assertEqual(port.empty, hard_coded_truth[i][2])

    def test_keep_seed(self) -> None:
        for backend_name in backends_to_test:
            self._init_env(backend_name)

            vessel_stops_1: List[
                List[Stop]] = self._business_engine._data_cntr.vessel_stops
            self._env.step(action=None)
            port_info_1 = [(port.booking, port.shortage, port.empty)
                           for port in self._business_engine._ports]

            self._env.reset(keep_seed=True)
            vessel_stops_2: List[
                List[Stop]] = self._business_engine._data_cntr.vessel_stops
            self._env.step(action=None)
            port_info_2 = [(port.booking, port.shortage, port.empty)
                           for port in self._business_engine._ports]

            self._env.reset(keep_seed=False)
            vessel_stops_3: List[
                List[Stop]] = self._business_engine._data_cntr.vessel_stops
            self._env.step(action=None)
            port_info_3 = [(port.booking, port.shortage, port.empty)
                           for port in self._business_engine._ports]

            # Vessel
            for i in range(self._business_engine._data_cntr.vessel_number):
                # 1 and 2 should be totally equal
                self.assertListEqual(vessel_stops_1[i], vessel_stops_2[i])

                # 1 and 3 should have difference
                flag = True
                for stop1, stop3 in zip(vessel_stops_1[i], vessel_stops_3[i]):
                    self.assertListEqual(
                        [stop1.index, stop1.port_idx, stop1.vessel_idx],
                        [stop3.index, stop3.port_idx, stop3.vessel_idx])
                    if (stop1.arrival_tick, stop1.leave_tick) != (
                            stop3.arrival_tick, stop3.leave_tick):
                        flag = False
                self.assertFalse(flag)

            # Port
            self.assertListEqual(port_info_1, port_info_2)
            self.assertFalse(
                all(port1 == port3
                    for port1, port3 in zip(port_info_1, port_info_3)))

    def test_order_export(self) -> None:
        """order.tick, order.src_port_idx, order.dest_port_idx, order.quantity"""
        Order = namedtuple(
            "Order", ["tick", "src_port_idx", "dest_port_idx", "quantity"])

        #
        for enabled in [False, True]:
            exporter = PortOrderExporter(enabled)

            for i in range(5):
                exporter.add(Order(0, 0, 1, i + 1))

            out_folder = tempfile.gettempdir()
            if os.path.exists(f"{out_folder}/orders.csv"):
                os.remove(f"{out_folder}/orders.csv")

            exporter.dump(out_folder)

            if enabled:
                with open(f"{out_folder}/orders.csv") as fp:
                    reader = csv.DictReader(fp)
                    row = 0
                    for line in reader:
                        self.assertEqual(row + 1, int(line["quantity"]))
                        row += 1
            else:  # Should done nothing
                self.assertFalse(os.path.exists(f"{out_folder}/orders.csv"))

    def test_early_discharge(self) -> None:
        for backend_name in backends_to_test:
            self._init_env(backend_name)

            metric, decision_event, is_done = self._env.step(None)
            assert isinstance(decision_event, DecisionEvent)

            self.assertEqual(decision_event.action_scope.load, 1240)
            self.assertEqual(decision_event.action_scope.discharge, 0)
            self.assertEqual(decision_event.early_discharge, 0)

            decision_event = pickle.loads(
                pickle.dumps(decision_event))  # Test serialization

            load_action = Action(vessel_idx=decision_event.vessel_idx,
                                 port_idx=decision_event.port_idx,
                                 quantity=1201,
                                 action_type=ActionType.LOAD)
            discharge_action = Action(vessel_idx=decision_event.vessel_idx,
                                      port_idx=decision_event.port_idx,
                                      quantity=1,
                                      action_type=ActionType.DISCHARGE)
            metric, decision_event, is_done = self._env.step(
                [load_action, discharge_action])

            history = []
            while not is_done:
                metric, decision_event, is_done = self._env.step(None)
                assert decision_event is None or isinstance(
                    decision_event, DecisionEvent)
                if decision_event is not None and decision_event.vessel_idx == 35:
                    v = self._business_engine._vessels[35]
                    history.append((v.full, v.empty, v.early_discharge))

            hard_coded_benchmark = [(465, 838, 362), (756, 547, 291),
                                    (1261, 42, 505), (1303, 0, 42),
                                    (1303, 0, 0), (1303, 0, 0), (803, 0, 0)]
            self.assertListEqual(history, hard_coded_benchmark)

            #
            payload_detail_benchmark = {
                'ORDER': ['tick', 'src_port_idx', 'dest_port_idx', 'quantity'],
                'RETURN_FULL': ['src_port_idx', 'dest_port_idx', 'quantity'],
                'VESSEL_ARRIVAL': ['port_idx', 'vessel_idx'],
                'LOAD_FULL': ['port_idx', 'vessel_idx'],
                'DISCHARGE_FULL':
                ['vessel_idx', 'port_idx', 'from_port_idx', 'quantity'],
                'PENDING_DECISION': [
                    'tick', 'port_idx', 'vessel_idx', 'snapshot_list',
                    'action_scope', 'early_discharge'
                ],
                'LOAD_EMPTY':
                ['port_idx', 'vessel_idx', 'action_type', 'quantity'],
                'DISCHARGE_EMPTY':
                ['port_idx', 'vessel_idx', 'action_type', 'quantity'],
                'VESSEL_DEPARTURE': ['port_idx', 'vessel_idx'],
                'RETURN_EMPTY': ['port_idx', 'quantity']
            }
            self.assertTrue(
                compare_dictionary(
                    self._business_engine.get_event_payload_detail(),
                    payload_detail_benchmark))
            port_number = self._business_engine._data_cntr.port_number
            self.assertListEqual(self._business_engine.get_agent_idx_list(),
                                 list(range(port_number)))
Пример #17
0
if __name__ == "__main__":
    start_tick = 0
    durations = 100  # 100 days

    opts = dict()
    """
    enable-dump-snapshot parameter means business_engine needs dump snapshot data before reset.
    If you leave value to empty string, it will dump to current folder.
    For getting dump data, please uncomment below line and specify dump destination folder.
    """
    opts['enable-dump-snapshot'] = 'YOUR_FOLDER_NAME'

    # Initialize an environment with a specific scenario, related topology.
    env = Env(scenario="cim",
              topology="global_trade.22p_l0.1",
              start_tick=start_tick,
              durations=durations,
              options=opts)
    # To reset environmental data before starting a new experiment.
    env.reset()
    # Query environment summary, which includes business instances, intra-instance attributes, etc.
    print(env.summary)

    for ep in range(2):
        # Gym-like step function.
        metrics, decision_event, is_done = env.step(None)

        while not is_done:
            past_week_ticks = [
                x for x in range(max(decision_event.tick -
                                     7, 0), decision_event.tick)
Пример #18
0
    subfolder_name = f"{config.env.param.topology}_{time_str}"

    # Log path.
    config.log.path = os.path.join(config.log.path, date_str, subfolder_name)
    if not os.path.exists(config.log.path):
        os.makedirs(config.log.path)

    simulation_logger = Logger(tag="simulation",
                               dump_folder=config.log.path,
                               dump_mode="w",
                               auto_timestamp=False)

    # Create a demo environment to retrieve environment information.
    simulation_logger.info(
        "Approximating the experience quantity of each agent...")
    demo_env = Env(**config.env.param)
    config.env.exp_per_ep = decision_cnt_analysis(demo_env,
                                                  pv=True,
                                                  buffer_size=8)
    simulation_logger.info(config.env.exp_per_ep)

    # Add some buffer to prevent overlapping.
    config.env.return_scaler, tot_order_amount = return_scaler(
        demo_env, tick=config.env.param.durations, gamma=config.training.gamma)
    simulation_logger.info(
        f"Return value will be scaled down by the factor {config.env.return_scaler}"
    )

    save_config(config, os.path.join(config.log.path, "config.yml"))
    save_code("examples/cim/gnn", config.log.path)
Пример #19
0
        raw_config = yaml.safe_load(in_file)
        config = convert_dottable(raw_config)

    # Overwrite the config.
    if args.topology is not None:
        config.env.topology = args.topology
    if args.seed is not None:
        config.env.seed = args.seed
    if args.peep:
        PEEP_AND_USE_REAL_DATA = True

    # Init an environment for Citi Bike.
    env = Env(
        scenario=config.env.scenario,
        topology=config.env.topology,
        start_tick=config.env.start_tick,
        durations=config.env.durations,
        snapshot_resolution=config.env.resolution,
    )

    # For debug only, used to peep the BE to get the real future data.
    if PEEP_AND_USE_REAL_DATA:
        ENV = env
        TRIP_PICKER = BinaryReader(env.configs["trip_data"]).items_tick_picker(
            start_time_offset=config.env.start_tick,
            end_time_offset=(config.env.start_tick + config.env.durations),
            time_unit="m"
        )

    if config.env.seed is not None:
        env.set_seed(config.env.seed)
Пример #20
0
def single_player_worker(index, config, exp_idx_mapping, pipe, action_io,
                         exp_output):
    """The A2C worker function to collect experience.

    Args:
        index (int): The process index counted from 0.
        config (dict): It is a dottable dictionary that stores the configuration of the simulation, state_shaper and
            postprocessing shaper.
        exp_idx_mapping (dict): The key is agent code and the value is the starting index where the experience is stored
            in the experience batch.
        pipe (Pipe): The pipe instance for communication with the main process.
        action_io (SharedStructure): The shared memory to hold the state information that the main process uses to
            generate an action.
        exp_output (SharedStructure): The shared memory to transfer the experience list to the main process.
    """
    env = Env(**config.env.param)
    fix_seed(env, config.env.seed)
    static_code_list, dynamic_code_list = list(env.summary["node_mapping"]["ports"].values()), \
        list(env.summary["node_mapping"]["vessels"].values())
    # Create gnn_state_shaper without consuming any resources.

    gnn_state_shaper = GNNStateShaper(
        static_code_list,
        dynamic_code_list,
        config.env.param.durations,
        config.model.feature,
        tick_buffer=config.model.tick_buffer,
        max_value=env.configs["total_containers"])
    gnn_state_shaper.compute_static_graph_structure(env)

    action_io_np = action_io.structuralize()

    action_shaper = DiscreteActionShaper(config.model.action_dim)
    exp_shaper = ExperienceShaper(static_code_list,
                                  dynamic_code_list,
                                  config.env.param.durations,
                                  gnn_state_shaper,
                                  scale_factor=config.env.return_scaler,
                                  time_slot=config.training.td_steps,
                                  discount_factor=config.training.gamma,
                                  idx=index,
                                  shared_storage=exp_output.structuralize(),
                                  exp_idx_mapping=exp_idx_mapping)

    i = 0
    while pipe.recv() == "reset":
        env.reset()
        r, decision_event, is_done = env.step(None)

        j = 0
        logs = []
        while not is_done:
            model_input = gnn_state_shaper(decision_event, env.snapshot_list)
            action_io_np["v"][:, index] = model_input["v"]
            action_io_np["p"][:, index] = model_input["p"]
            action_io_np["vo"][index] = model_input["vo"]
            action_io_np["po"][index] = model_input["po"]
            action_io_np["vedge"][index] = model_input["vedge"]
            action_io_np["pedge"][index] = model_input["pedge"]
            action_io_np["ppedge"][index] = model_input["ppedge"]
            action_io_np["mask"][index] = model_input["mask"]
            action_io_np["pid"][index] = decision_event.port_idx
            action_io_np["vid"][index] = decision_event.vessel_idx
            pipe.send("features")
            model_action = pipe.recv()
            env_action = action_shaper(decision_event, model_action)
            exp_shaper.record(decision_event=decision_event,
                              model_action=model_action,
                              model_input=model_input)
            logs.append([
                index, decision_event.tick, decision_event.port_idx,
                decision_event.vessel_idx, model_action, env_action,
                decision_event.action_scope.load,
                decision_event.action_scope.discharge
            ])
            action = Action(decision_event.vessel_idx, decision_event.port_idx,
                            env_action)
            r, decision_event, is_done = env.step(action)
            j += 1
        action_io_np["sh"][index] = compute_shortage(
            env.snapshot_list, config.env.param.durations, static_code_list)
        i += 1
        pipe.send("done")
        gnn_state_shaper.end_ep_callback(env.snapshot_list)
        # Organize and synchronize exp to shared memory.
        exp_shaper(env.snapshot_list)
        exp_shaper.reset()
        logs = np.array(logs, dtype=np.float)
        pipe.send(logs)
Пример #21
0
            agent_id = list(state.keys())[0]
            data = training_data.setdefault(agent_id,
                                            {"args": [[] for _ in range(4)]})
            data["args"][0].append(state[agent_id])  # state
            data["args"][1].append(action[agent_id][0])  # action
            data["args"][2].append(action[agent_id][1])  # log_p
            data["args"][3].append(self.get_offline_reward(event))  # reward

        for agent_id in training_data:
            training_data[agent_id]["args"] = [
                np.asarray(vals, dtype=np.float32 if i == 3 else None)
                for i, vals in enumerate(training_data[agent_id]["args"])
            ]

        return training_data


# Single-threaded launcher
if __name__ == "__main__":
    set_seeds(1024)  # for reproducibility
    env = Env(**training_config["env"])
    agent = MultiAgentWrapper(
        {name: get_ac_agent()
         for name in env.agent_idx_list})
    actor = Actor(env,
                  agent,
                  CIMTrajectoryForAC,
                  trajectory_kwargs=common_config)  # local actor
    learner = OnPolicyLearner(actor, training_config["max_episode"])
    learner.run()
Пример #22
0
from examples.ecr.rl_formulations.common.reward_shaper import ECRRewardShaper
from examples.ecr.rl_formulations.common.explorer import TwoPhaseLinearExplorer, exploration_config

with io.open("../config.yml", "r") as in_file:
    raw_config = yaml.safe_load(in_file)
    cf = convert_dottable(raw_config)

if cf.rl.modeling == "dqn":
    from examples.ecr.rl_formulations.dqn_agent_manager import DQNAgentManager, num_actions
    agent_manager_cls = DQNAgentManager
    action_space = list(np.linspace(-1.0, 1.0, num_actions))
else:  # TODO: enc_gat agent_manager class
    raise ValueError(f"Unsupported RL algorithm: {cf.rl.modeling}")

if __name__ == "__main__":
    env = Env(cf.env.scenario, cf.env.topology, durations=cf.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    state_shaper = ECRStateShaper(**cf.state_shaping)
    action_shaper = ECRActionShaper(action_space=action_space)
    if cf.reward_shaping.type == "truncated":
        reward_shaper = ECRRewardShaper(agent_id_list=agent_id_list,
                                        **cf.reward_shaping.truncated)
    else:
        reward_shaper = KStepRewardShaper(reward_func=lambda mt: mt["perf"],
                                          **cf.reward_shaping.k_step)
    explorer = TwoPhaseLinearExplorer(agent_id_list,
                                      cf.rl.total_training_episodes,
                                      **exploration_config)
    agent_manager = agent_manager_cls(name="ecr_learner",
                                      mode=AgentMode.TRAIN_INFERENCE,
                                      agent_id_list=agent_id_list,
Пример #23
0
os.environ["MARO_STREAMIT_EXPERIMENT_NAME"] = "experiment_example"

from random import seed, randint

from maro.simulator import Env
from maro.simulator.scenarios.cim.common import Action, ActionScope, ActionType
from maro.streamit import streamit

if __name__ == "__main__":
    seed(0)
    NUM_EPISODE = 2

    with streamit:
        # Initialize an environment with a specific scenario, related topology.
        env = Env(scenario="cim",
                  topology="global_trade.22p_l0.1",
                  start_tick=0,
                  durations=100)

        # To reset environmental data before starting a new experiment.
        env.reset()

        for ep in range(NUM_EPISODE):
            # Tell streamit we are in a new episode.
            streamit.episode(ep)

            # Gym-like step function.
            metrics, decision_event, is_done = env.step(None)

            while not is_done:
                action_scope = decision_event.action_scope
                to_discharge = action_scope.discharge > 0 and randint(0, 1) > 0
Пример #24
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from maro.simulator import Env
from maro.simulator.scenarios.cim.common import Action

start_tick = 0
durations = 100  # 100 days

# Initialize an environment with a specific scenario, related topology.
env = Env(scenario="cim",
          topology="toy.5p_ssddd_l0.0",
          start_tick=start_tick,
          durations=durations)

# Query environment summary, which includes business instances, intra-instance attributes, etc.
print(env.summary)

for ep in range(2):
    # Gym-like step function
    metrics, decision_event, is_done = env.step(None)

    while not is_done:
        past_week_ticks = [
            x for x in range(decision_event.tick - 7, decision_event.tick)
        ]
        decision_port_idx = decision_event.port_idx
        intr_port_infos = ["booking", "empty", "shortage"]

        # Query the decision port booking, empty container inventory, shortage information in the past week
        past_week_info = env.snapshot_list["ports"][