def launch(config): config = convert_dottable(config) # Step 1: Initialize a CIM environment for using a toy dataset. env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] action_space = list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions)) # Step 2: Create state, action and experience shapers. We also need to create an explorer here due to the # greedy nature of the DQN algorithm. state_shaper = CIMStateShaper(**config.env.state_shaping) action_shaper = CIMActionShaper(action_space=action_space) experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping) # Step 3: Create agents and an agent manager. config["agents"]["algorithm"]["input_dim"] = state_shaper.dim agent_manager = DQNAgentManager( name="cim_learner", mode=AgentManagerMode.TRAIN_INFERENCE, agent_dict=create_dqn_agents(agent_id_list, config.agents), state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper ) # Step 4: Create an actor and a learner to start the training process. scheduler = TwoPhaseLinearParameterScheduler(config.main_loop.max_episode, **config.main_loop.exploration) actor = SimpleActor(env, agent_manager) learner = SimpleLearner( agent_manager, actor, scheduler, logger=Logger("cim_learner", format_=LogFormat.simple, auto_timestamp=False) ) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models"))
def launch(config, distributed_config): config = convert_dottable(config) distributed_config = convert_dottable(distributed_config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] state_shaper = CIMStateShaper(**config.env.state_shaping) action_shaper = CIMActionShaper(action_space=list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions))) experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping) config["agents"]["algorithm"]["input_dim"] = state_shaper.dim agent_manager = DQNAgentManager( name="cim_actor", mode=AgentManagerMode.INFERENCE, agent_dict=create_dqn_agents(agent_id_list, config.agents), state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper ) proxy_params = { "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group, "expected_peers": {"learner": 1}, "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port), "max_retries": 15 } actor_worker = ActorWorker( local_actor=SimpleActor(env=env, agent_manager=agent_manager), proxy_params=proxy_params ) actor_worker.launch()
def launch(config, distributed_config): config = convert_dottable(config) distributed_config = convert_dottable(distributed_config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] config["agents"]["algorithm"]["input_dim"] = CIMStateShaper( **config.env.state_shaping).dim agent_manager = DQNAgentManager(name="cim_learner", mode=AgentManagerMode.TRAIN, agent_dict=create_dqn_agents( agent_id_list, config.agents)) proxy_params = { "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group, "expected_peers": { "actor": int(os.environ["NUM_ACTORS"] if "NUM_ACTORS" in os.environ else distributed_config.num_actors) }, "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port), "max_retries": 15 } learner = SimpleLearner( agent_manager=agent_manager, actor=ActorProxy( proxy_params=proxy_params, experience_collecting_func=concat_experiences_by_agent), scheduler=TwoPhaseLinearParameterScheduler( config.main_loop.max_episode, **config.main_loop.exploration), logger=Logger("cim_learner", auto_timestamp=False)) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models")) learner.exit()
experience_shaper = KStepExperienceShaper( reward_func=lambda mt: 1 - mt["container_shortage"] / mt["order_requirements"], **config.experience_shaping.k_step ) exploration_config = { "epsilon_range_dict": {"_all_": config.exploration.epsilon_range}, "split_point_dict": {"_all_": config.exploration.split_point}, "with_cache": config.exploration.with_cache } explorer = TwoPhaseLinearExplorer(agent_id_list, config.general.total_training_episodes, **exploration_config) agent_manager = DQNAgentManager( name="cim_remote_actor", agent_id_list=agent_id_list, mode=AgentMode.INFERENCE, state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper, explorer=explorer ) proxy_params = { "group_name": config.distributed.group_name, "expected_peers": config.distributed.actor.peer, "redis_address": (config.distributed.redis.host_name, config.distributed.redis.port), "max_retries": 10 } actor_worker = ActorWorker( local_actor=SimpleActor(env=env, inference_agents=agent_manager), proxy_params=proxy_params ) actor_worker.launch()
"_all_": config.exploration.epsilon_range }, "split_point_dict": { "_all_": config.exploration.split_point }, "with_cache": config.exploration.with_cache } explorer = TwoPhaseLinearExplorer(agent_id_list, config.general.total_training_episodes, **exploration_config) # Step 3: create an agent manager. agent_manager = DQNAgentManager(name="cim_learner", mode=AgentMode.TRAIN_INFERENCE, agent_id_list=agent_id_list, state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper, explorer=explorer) # Step 4: Create an actor and a learner to start the training process. actor = SimpleActor(env=env, inference_agents=agent_manager) learner = SimpleLearner( trainable_agents=agent_manager, actor=actor, logger=Logger(tag="single_host_cim_learner", dump_folder=os.path.join( os.path.split(os.path.realpath(__file__))[0], "log"), auto_timestamp=False)) learner.train(total_episodes=config.general.total_training_episodes)
state_shaper = CIMStateShaper(**config.state_shaping) exploration_config = { "epsilon_range_dict": { "_all_": config.exploration.epsilon_range }, "split_point_dict": { "_all_": config.exploration.split_point }, "with_cache": config.exploration.with_cache } explorer = TwoPhaseLinearExplorer(agent_id_list, config.general.total_training_episodes, **exploration_config) agent_manager = DQNAgentManager(name="cim_remote_learner", agent_id_list=agent_id_list, mode=AgentMode.TRAIN, state_shaper=state_shaper, explorer=explorer) proxy_params = { "group_name": config.distributed.group_name, "expected_peers": config.distributed.learner.peer, "redis_address": (config.distributed.redis.host_name, config.distributed.redis.port), "max_retries": 10 } learner = SimpleLearner( trainable_agents=agent_manager,