Python DQNAgentManager示例，components.DQNAgentManager Python示例

示例#1

0

显示文件

def launch(config):
    config = convert_dottable(config)
    # Step 1: Initialize a CIM environment for using a toy dataset.
    env = Env(config.env.scenario, config.env.topology, durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    action_space = list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions))

    # Step 2: Create state, action and experience shapers. We also need to create an explorer here due to the
    # greedy nature of the DQN algorithm.
    state_shaper = CIMStateShaper(**config.env.state_shaping)
    action_shaper = CIMActionShaper(action_space=action_space)
    experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping)

    # Step 3: Create agents and an agent manager.
    config["agents"]["algorithm"]["input_dim"] = state_shaper.dim
    agent_manager = DQNAgentManager(
        name="cim_learner",
        mode=AgentManagerMode.TRAIN_INFERENCE,
        agent_dict=create_dqn_agents(agent_id_list, config.agents),
        state_shaper=state_shaper,
        action_shaper=action_shaper,
        experience_shaper=experience_shaper
    )

    # Step 4: Create an actor and a learner to start the training process.
    scheduler = TwoPhaseLinearParameterScheduler(config.main_loop.max_episode, **config.main_loop.exploration)
    actor = SimpleActor(env, agent_manager)
    learner = SimpleLearner(
        agent_manager, actor, scheduler,
        logger=Logger("cim_learner", format_=LogFormat.simple, auto_timestamp=False)
    )
    learner.learn()
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))

示例#2

0

显示文件

文件： dist_actor.py 项目： Kans-alpha/maro

def launch(config, distributed_config):
    config = convert_dottable(config)
    distributed_config = convert_dottable(distributed_config)
    env = Env(config.env.scenario, config.env.topology, durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    state_shaper = CIMStateShaper(**config.env.state_shaping)
    action_shaper = CIMActionShaper(action_space=list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions)))
    experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping)

    config["agents"]["algorithm"]["input_dim"] = state_shaper.dim
    agent_manager = DQNAgentManager(
        name="cim_actor",
        mode=AgentManagerMode.INFERENCE,
        agent_dict=create_dqn_agents(agent_id_list, config.agents),
        state_shaper=state_shaper,
        action_shaper=action_shaper,
        experience_shaper=experience_shaper
    )
    proxy_params = {
        "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group,
        "expected_peers": {"learner": 1},
        "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port),
        "max_retries": 15
    }
    actor_worker = ActorWorker(
        local_actor=SimpleActor(env=env, agent_manager=agent_manager),
        proxy_params=proxy_params
    )
    actor_worker.launch()

示例#3

0

显示文件

文件： dist_learner.py 项目： yourmoonlight/maro

def launch(config, distributed_config):
    config = convert_dottable(config)
    distributed_config = convert_dottable(distributed_config)
    env = Env(config.env.scenario,
              config.env.topology,
              durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]

    config["agents"]["algorithm"]["input_dim"] = CIMStateShaper(
        **config.env.state_shaping).dim
    agent_manager = DQNAgentManager(name="cim_learner",
                                    mode=AgentManagerMode.TRAIN,
                                    agent_dict=create_dqn_agents(
                                        agent_id_list, config.agents))

    proxy_params = {
        "group_name":
        os.environ["GROUP"]
        if "GROUP" in os.environ else distributed_config.group,
        "expected_peers": {
            "actor":
            int(os.environ["NUM_ACTORS"] if "NUM_ACTORS" in
                os.environ else distributed_config.num_actors)
        },
        "redis_address":
        (distributed_config.redis.hostname, distributed_config.redis.port),
        "max_retries":
        15
    }

    learner = SimpleLearner(
        agent_manager=agent_manager,
        actor=ActorProxy(
            proxy_params=proxy_params,
            experience_collecting_func=concat_experiences_by_agent),
        scheduler=TwoPhaseLinearParameterScheduler(
            config.main_loop.max_episode, **config.main_loop.exploration),
        logger=Logger("cim_learner", auto_timestamp=False))
    learner.learn()
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
    learner.exit()

示例#4

0

显示文件

        experience_shaper = KStepExperienceShaper(
            reward_func=lambda mt: 1 - mt["container_shortage"] / mt["order_requirements"],
            **config.experience_shaping.k_step
        )

    exploration_config = {
        "epsilon_range_dict": {"_all_": config.exploration.epsilon_range},
        "split_point_dict": {"_all_": config.exploration.split_point},
        "with_cache": config.exploration.with_cache
    }
    explorer = TwoPhaseLinearExplorer(agent_id_list, config.general.total_training_episodes, **exploration_config)
    agent_manager = DQNAgentManager(
        name="cim_remote_actor",
        agent_id_list=agent_id_list,
        mode=AgentMode.INFERENCE,
        state_shaper=state_shaper,
        action_shaper=action_shaper,
        experience_shaper=experience_shaper,
        explorer=explorer
    )
    proxy_params = {
        "group_name": config.distributed.group_name,
        "expected_peers": config.distributed.actor.peer,
        "redis_address": (config.distributed.redis.host_name, config.distributed.redis.port),
        "max_retries": 10
    }
    actor_worker = ActorWorker(
        local_actor=SimpleActor(env=env, inference_agents=agent_manager),
        proxy_params=proxy_params
    )
    actor_worker.launch()

示例#5

0

显示文件

文件： single_process_launcher.py 项目： zhangruiskyline/maro

            "_all_": config.exploration.epsilon_range
        },
        "split_point_dict": {
            "_all_": config.exploration.split_point
        },
        "with_cache": config.exploration.with_cache
    }
    explorer = TwoPhaseLinearExplorer(agent_id_list,
                                      config.general.total_training_episodes,
                                      **exploration_config)

    # Step 3: create an agent manager.
    agent_manager = DQNAgentManager(name="cim_learner",
                                    mode=AgentMode.TRAIN_INFERENCE,
                                    agent_id_list=agent_id_list,
                                    state_shaper=state_shaper,
                                    action_shaper=action_shaper,
                                    experience_shaper=experience_shaper,
                                    explorer=explorer)

    # Step 4: Create an actor and a learner to start the training process.
    actor = SimpleActor(env=env, inference_agents=agent_manager)
    learner = SimpleLearner(
        trainable_agents=agent_manager,
        actor=actor,
        logger=Logger(tag="single_host_cim_learner",
                      dump_folder=os.path.join(
                          os.path.split(os.path.realpath(__file__))[0], "log"),
                      auto_timestamp=False))

    learner.train(total_episodes=config.general.total_training_episodes)

示例#6

0

显示文件

文件： dist_learner.py 项目： zhangruiskyline/maro

    state_shaper = CIMStateShaper(**config.state_shaping)
    exploration_config = {
        "epsilon_range_dict": {
            "_all_": config.exploration.epsilon_range
        },
        "split_point_dict": {
            "_all_": config.exploration.split_point
        },
        "with_cache": config.exploration.with_cache
    }
    explorer = TwoPhaseLinearExplorer(agent_id_list,
                                      config.general.total_training_episodes,
                                      **exploration_config)
    agent_manager = DQNAgentManager(name="cim_remote_learner",
                                    agent_id_list=agent_id_list,
                                    mode=AgentMode.TRAIN,
                                    state_shaper=state_shaper,
                                    explorer=explorer)

    proxy_params = {
        "group_name":
        config.distributed.group_name,
        "expected_peers":
        config.distributed.learner.peer,
        "redis_address":
        (config.distributed.redis.host_name, config.distributed.redis.port),
        "max_retries":
        10
    }
    learner = SimpleLearner(
        trainable_agents=agent_manager,