Python build_DQN_Agent示例，regym.rl_algorithms.agents.build_DQN_Agent Python示例

示例#1

0

显示文件

def test_dueling_DQN_learns_to_beat_rock_in_RPS(RPSTask, dqn_config_dict):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    from play_against_fixed_opponent import learn_against_fix_opponent

    from torch.utils.tensorboard import SummaryWriter
    import regym
    regym.rl_algorithms.DQN.dqn_loss.summary_writer = SummaryWriter(
        'test_tensorboard')
    dqn_config_dict['dueling'] = True
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'Dueling_DQN')
    assert agent.training and agent.algorithm.use_dueling
    learn_against_fix_opponent(
        agent,
        fixed_opponent=rockAgent,
        agent_position=0,  # Doesn't matter in RPS
        task=RPSTask,
        total_episodes=250,
        training_percentage=0.9,
        reward_tolerance=2.,
        maximum_average_reward=10.0,
        evaluation_method='cumulative')

示例#2

0

显示文件

文件： dqn_test.py 项目： Danielhp95/Regym

def test_double_DQN_learns_to_beat_rock_in_RPS(RPSTask, dqn_config_dict):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''

    from torch.utils.tensorboard import SummaryWriter
    import regym
    regym.rl_algorithms.DQN.dqn_loss.summary_writer = SummaryWriter(
        'test_tensorboard')
    dqn_config_dict['double'] = True
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'Double_DQN')
    agent.state_preprocessing_fn = flatten_and_turn_into_single_element_batch
    assert agent.training and agent.algorithm.use_double
    learn_against_fix_opponent(
        agent,
        fixed_opponent=rockAgent,
        agent_position=0,  # Doesn't matter in RPS
        task=RPSTask,
        training_episodes=250,
        benchmark_every_n_episodes=0,
        test_episodes=50,
        reward_tolerance=1.,
        maximum_average_reward=10.0,
        evaluation_method='cumulative')

示例#3

0

显示文件

文件： agent_hook_test.py 项目： Mark-F10/Regym

def test_can_save_dqn_to_memory(RPSTask, dqn_config_dict):
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'DQN')
    save_path = '/tmp/test_save.agent'
    hook = AgentHook(agent, save_path)

    assess_file_has_been_saved_on_disk_and_not_on_ram(hook, save_path)
    os.remove(save_path)

示例#4

0

显示文件

def test_dqn_can_take_actions(RPSTask, dqn_config_dict):
    env = RPSTask.env
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'DQN')
    number_of_actions = 30
    for i in range(number_of_actions):
        # asumming that first observation corresponds to observation space of this agent
        random_observation = env.observation_space.sample()[0]
        a = agent.take_action(random_observation, legal_actions=[0, 1, 2])
        observation, rewards, done, info = env.step([a, a])
        assert RPSTask.env.action_space.contains([a, a])

示例#5

0

显示文件

文件： agent_hook_test.py 项目： Mark-F10/Regym

def test_can_load_dqn_from_agenthook_with_cuda(RPSTask, dqn_config_dict):
    dqn_config_dict['use_cuda'] = True
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'DQN')
    save_path = '/tmp/test_save.agent'
    hook = AgentHook(agent, save_path=save_path)

    retrieved_agent = AgentHook.unhook(hook)
    model_list = [
        retrieved_agent.algorithm.model, retrieved_agent.algorithm.target_model
    ]
    assert_model_parameters_are_cuda_tensors(model_list)

示例#6

0

显示文件

文件： dqn_test.py 项目： Danielhp95/Regym

def test_dqn_can_take_actions(RPSTask, dqn_config_dict):
    env = RPSTask.env
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'DQN')
    agent.state_preprocessing_fn = flatten_and_turn_into_single_element_batch
    number_of_actions = 30
    for i in range(number_of_actions):
        # asumming that first observation corresponds to observation space of this agent
        random_observation = env.observation_space.sample()[0]
        a = agent.model_free_take_action(random_observation,
                                         legal_actions=[0, 1, 2])
        observation, rewards, done, info = env.step([a, a])
        assert RPSTask.env.action_space.contains([a, a])

示例#7

0

显示文件

文件： agent_hook_test.py 项目： Mark-F10/Regym

def test_can_hook_dqn_agent_using_cuda(RPSTask, dqn_config_dict):
    dqn_config_dict['use_cuda'] = True
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'DQN')
    assert all(
        map(lambda param: param.is_cuda, agent.algorithm.model.parameters()))
    assert all(
        map(lambda param: param.is_cuda,
            agent.algorithm.target_model.parameters()))
    hook = AgentHook(agent)

    compare_against_expected_agenthook(
        agent, hook, AgentType.DQN,
        [hook.agent.algorithm.model, hook.agent.algorithm.target_model])

示例#8

0

显示文件

文件： agent_hook_test.py 项目： Mark-F10/Regym

def test_can_unhook_dqn_agenthook_cuda(RPSTask, dqn_config_dict):
    dqn_config_dict['use_cuda'] = True
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'DQN')
    assert all(
        map(lambda param: param.is_cuda, agent.algorithm.model.parameters()))
    assert all(
        map(lambda param: param.is_cuda,
            agent.algorithm.target_model.parameters()))
    hook = AgentHook(agent)
    retrieved_agent = AgentHook.unhook(hook)

    compare_against_expected_retrieved_agent(agent, retrieved_agent, [
        retrieved_agent.algorithm.model, retrieved_agent.algorithm.target_model
    ])