示例#1
0
def create_agent(agent, gamma, beta, num_iters, max_delay, hyperbolic_constant,
                 calibration):
    """Creates the agent specified in config."""
    if agent == 'optimal':
        return fast_agents.FastOptimalAgent(gamma=gamma,
                                            beta=beta,
                                            num_iters=num_iters)
    elif agent == 'naive':
        return fast_agents.FastNaiveTimeDiscountingAgent(max_delay,
                                                         hyperbolic_constant,
                                                         gamma=gamma,
                                                         beta=beta,
                                                         num_iters=num_iters)
    elif agent == 'sophisticated':
        return fast_agents.FastSophisticatedTimeDiscountingAgent(
            max_delay,
            hyperbolic_constant,
            gamma=gamma,
            beta=beta,
            num_iters=num_iters)
    elif agent == 'myopic':
        return fast_agents.FastMyopicAgent(max_delay,
                                           gamma=gamma,
                                           beta=beta,
                                           num_iters=num_iters)
    elif agent in ['overconfident', 'underconfident']:
        assert calibration > 1 if agent == 'overconfident' else calibration < 1
        return fast_agents.FastUncalibratedAgent(
            gamma=gamma,
            beta=beta,
            num_iters=num_iters,
            calibration_factor=calibration)
    raise ValueError('Invalid agent: ' + agent)
示例#2
0
def infer_with_rational_planner(config, beta=None):
    if config.verbosity >= 2:
        print('Using a rational planner with beta {} to mimic normal IRL'.format(beta))
    architecture = PlannerArchitecture(config)

    agent, other_agents = create_agents_from_config(config)
    num_without_reward = make_evenly_batched(config.num_human_trajectories, config)
    num_simulated, num_validation = config.num_simulated, config.num_validation

    optimal_agent = fast_agents.FastOptimalAgent(
        gamma=config.gamma, beta=beta, num_iters=config.num_iters)
    train_data, validation_data = generate_data_for_planner(
        num_simulated, num_validation, optimal_agent, config, other_agents)
    reward_data = generate_data_for_reward(
        num_without_reward, agent, config, other_agents)
    return run_inference(train_data, validation_data, reward_data, 
                         two_phase_algorithm, architecture, config)
示例#3
0
def infer_with_no_rewards(config, train_jointly, initialize):
    if config.verbosity >= 2:
        s1 = 'jointly' if train_jointly else 'iteratively'
        s2 = 'with' if initialize else 'without'
        print('No rewards given, training planner and reward {} {} initialization'.format(s1, s2))
    architecture = PlannerArchitecture(config)

    agent, other_agents = create_agents_from_config(config)
    num_simulated, num_validation = config.num_simulated, config.num_validation
    num_without_reward = make_evenly_batched(config.num_human_trajectories, config)
    reward_data = generate_data_for_reward(
        num_without_reward, agent, config, other_agents)
    alg = joint_algorithm if train_jointly else iterative_algorithm

    train_data, validation_data = None, None
    if initialize:
        optimal_agent = fast_agents.FastOptimalAgent(
            gamma=config.gamma, beta=config.beta, num_iters=config.num_iters)
        train_data, validation_data = generate_data_for_planner(
            num_simulated, num_validation, optimal_agent, config, other_agents)
    return run_inference(train_data, validation_data, reward_data, alg, architecture, config)
示例#4
0
def main():
    grid = [['X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X'],
            ['X', ' ', -90, -90, -90, -90, '8', ' ', 'X'],
            ['X', 'A', ' ', ' ', ' ', ' ', ' ', ' ', 'X'],
            ['X', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'X'],
            ['X', ' ', ' ', -99, '2', ' ', ' ', ' ', 'X'],
            ['X', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'X'],
            ['X', ' ', '1', ' ', ' ', ' ', ' ', ' ', 'X'],
            ['X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X']]
    mdp = GridworldMdp(grid, living_reward=-0.01, noise=0.2)
    env = Mdp(mdp)
    opt = fast_agents.FastOptimalAgent(gamma=0.95, num_iters=20)
    naive = NaiveTimeDiscountingAgent(10, 1, gamma=0.95, num_iters=20)
    soph = fast_agents.FastSophisticatedTimeDiscountingAgent(10,
                                                             1,
                                                             gamma=0.95,
                                                             num_iters=20)
    myopic = fast_agents.FastMyopicAgent(6, gamma=0.95, num_iters=20)
    over = fast_agents.FastUncalibratedAgent(gamma=0.95,
                                             num_iters=20,
                                             calibration_factor=5)
    under = fast_agents.FastUncalibratedAgent(gamma=0.95,
                                              num_iters=20,
                                              calibration_factor=0.5)

    agents = [opt, naive, soph, myopic, over, under]
    names = [
        'Optimal', 'Naive', 'Sophisticated', 'Myopic', 'Overconfident',
        'Underconfident'
    ]
    for name, agent in zip(names, agents):
        print('{} agent'.format(name))
        agent.set_mdp(mdp)
        trajectory = run_agent(agent, env, episode_length=50, determinism=True)
        if agent == naive:
            print([a for _, a, _, _ in trajectory])
        print_training_example(mdp, trajectory)
    print(opt.values.T)
示例#5
0
 def test_gridworld_optimal_agent(self):
     agent = fast_agents.FastOptimalAgent(gamma=0.95, num_iters=20)
     self.optimal_agent_test(agent)
示例#6
0
 def test_compare_optimal_agents(self):
     agent1 = agents.OptimalAgent(gamma=0.95, num_iters=20)
     agent2 = fast_agents.FastOptimalAgent(gamma=0.95, num_iters=20)
     self.compare_agents('optimal', agent1, agent2, print_mdp=True)