def launch(config, distributed_config): config = convert_dottable(config) distributed_config = convert_dottable(distributed_config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] state_shaper = CIMStateShaper(**config.env.state_shaping) action_shaper = CIMActionShaper(action_space=list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions))) experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping) config["agents"]["algorithm"]["input_dim"] = state_shaper.dim agent_manager = DQNAgentManager( name="cim_actor", mode=AgentManagerMode.INFERENCE, agent_dict=create_dqn_agents(agent_id_list, config.agents), state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper ) proxy_params = { "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group, "expected_peers": {"learner": 1}, "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port), "max_retries": 15 } actor_worker = ActorWorker( local_actor=SimpleActor(env=env, agent_manager=agent_manager), proxy_params=proxy_params ) actor_worker.launch()
def launch(config): config = convert_dottable(config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] config["agents"]["input_dim"] = CIMStateShaper( **config.env.state_shaping).dim agent_manager = POAgentManager(name="cim_learner", mode=AgentManagerMode.TRAIN, agent_dict=create_po_agents( agent_id_list, config.agents)) proxy_params = { "group_name": os.environ["GROUP"], "expected_peers": { "actor": int(os.environ["NUM_ACTORS"]) }, "redis_address": ("localhost", 6379) } learner = SimpleLearner( agent_manager=agent_manager, actor=ActorProxy(proxy_params=proxy_params, experience_collecting_func= merge_experiences_with_trajectory_boundaries), scheduler=Scheduler(config.main_loop.max_episode), logger=Logger("cim_learner", auto_timestamp=False)) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models")) learner.exit()
def launch(config): config = convert_dottable(config) # Step 1: Initialize a CIM environment for using a toy dataset. env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] action_space = list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions)) # Step 2: Create state, action and experience shapers. We also need to create an explorer here due to the # greedy nature of the DQN algorithm. state_shaper = CIMStateShaper(**config.env.state_shaping) action_shaper = CIMActionShaper(action_space=action_space) experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping) # Step 3: Create agents and an agent manager. config["agents"]["algorithm"]["input_dim"] = state_shaper.dim agent_manager = DQNAgentManager( name="cim_learner", mode=AgentManagerMode.TRAIN_INFERENCE, agent_dict=create_dqn_agents(agent_id_list, config.agents), state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper ) # Step 4: Create an actor and a learner to start the training process. scheduler = TwoPhaseLinearParameterScheduler(config.main_loop.max_episode, **config.main_loop.exploration) actor = SimpleActor(env, agent_manager) learner = SimpleLearner( agent_manager, actor, scheduler, logger=Logger("cim_learner", format_=LogFormat.simple, auto_timestamp=False) ) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models"))
def load_config(config_pth): with io.open(config_pth, "r") as in_file: raw_config = yaml.safe_load(in_file) config = convert_dottable(raw_config) if config.env.seed < 0: config.env.seed = random.randint(0, 99999) regularize_config(config) return config
def launch(config, distributed_config): config = convert_dottable(config) distributed_config = convert_dottable(distributed_config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] config["agents"]["algorithm"]["input_dim"] = CIMStateShaper( **config.env.state_shaping).dim agent_manager = DQNAgentManager(name="cim_learner", mode=AgentManagerMode.TRAIN, agent_dict=create_dqn_agents( agent_id_list, config.agents)) proxy_params = { "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group, "expected_peers": { "actor": int(os.environ["NUM_ACTORS"] if "NUM_ACTORS" in os.environ else distributed_config.num_actors) }, "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port), "max_retries": 15 } learner = SimpleLearner( agent_manager=agent_manager, actor=ActorProxy( proxy_params=proxy_params, experience_collecting_func=concat_experiences_by_agent), scheduler=TwoPhaseLinearParameterScheduler( config.main_loop.max_episode, **config.main_loop.exploration), logger=Logger("cim_learner", auto_timestamp=False)) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models")) learner.exit()
import pprint import yaml from maro.simulator import Env from maro.simulator.scenarios.vm_scheduling import DecisionPayload from maro.simulator.scenarios.vm_scheduling.common import Action from maro.utils import convert_dottable, Logger, LogFormat from ilp_agent import IlpAgent os.environ['LOG_LEVEL'] = 'CRITICAL' FILE_PATH = os.path.split(os.path.realpath(__file__))[0] CONFIG_PATH = os.path.join(FILE_PATH, "config.yml") with io.open(CONFIG_PATH, "r") as in_file: raw_config = yaml.safe_load(in_file) config = convert_dottable(raw_config) LOG_PATH = os.path.join(FILE_PATH, "log", config.experiment_name) if not os.path.exists(LOG_PATH): os.makedirs(LOG_PATH) simulation_logger = Logger(tag="simulation", format_=LogFormat.none, dump_folder=LOG_PATH, dump_mode="w", auto_timestamp=False) ilp_logger = Logger(tag="ilp", format_=LogFormat.none, dump_folder=LOG_PATH, dump_mode="w", auto_timestamp=False)
# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. from maro.utils import convert_dottable proxy = convert_dottable({ "suffix_length": 6, "fault_tolerant": False, "delay_for_slow_joiner": 3, "redis": { "host": "localhost", "port": 6379, "max_retries": 5, "base_retry_interval": 0.1 } }) driver = convert_dottable( {"zmq": { "protocol": "tcp", "send_timeout": -1, "receive_timeout": -1 }})
# Licensed under the MIT license. from maro.utils import convert_dottable proxy = convert_dottable({ "fault_tolerant": False, "delay_for_slow_joiner": 3, "redis": { "host": "localhost", "port": 6379, "max_retries": 10, "base_retry_interval": 0.1 }, "peer_rejoin": { "enable": False, "peers_catch_lifetime": 10, "minimal_peers": 1, # int, minimal request peer number; or dict {"peer_type": int} for each peer type "enable_message_cache": False, "max_length_for_message_cache": 1024, # The maximum number of pending messages for each peer "timeout_for_minimal_peer_number": 300, # second "is_remove_failed_container": False, # Automatically clean the failed container "max_rejoin_times": 5 } }) driver = convert_dottable( {"zmq": { "protocol": "tcp",