Exemplo n.º 1
0
def test_TRPO_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(datetime.now().strftime("%H%M%S%f"))

    params = dict(ent_coeff=0.0, max_kl=.001, lam=.98, n_epochs_line_search=10,
                  n_epochs_cg=10, cg_damping=1e-2, cg_residual_tol=1e-10,
                  quiet=True)

    agent_save = learn(TRPO, params)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 2
0
def test_sac_save():
    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save = learn_sac()

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        #print('{}: {}'.format(att, type(save_attr)))

        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 3
0
def test_fqi_boosted_save():
    params = dict(n_iterations=10, boosted=True)
    agent_save, _ = learn(FQI, params)

    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        #print('{}: {}'.format(att, type(save_attr)))

        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 4
0
def test_eNAC_save():
    params = dict(learning_rate=AdaptiveParameter(value=.01))

    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save = learn(eNAC, params)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)

        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 5
0
def test_PPO_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(datetime.now().strftime("%H%M%S%f"))

    params = dict(actor_optimizer={'class': optim.Adam,
                                   'params': {'lr': 3e-4}},
                  n_epochs_policy=4, batch_size=64, eps_ppo=.2, lam=.95,
                  quiet=True)

    agent_save = learn(PPO, params)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 6
0
def test_categorical_dqn_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(
        datetime.now().strftime("%H%M%S%f"))

    params = dict(batch_size=50,
                  initial_replay_size=50,
                  max_replay_size=5000,
                  target_update_frequency=50)
    agent_save = learn(CategoricalDQN, params)

    agent_save.save(agent_path, full_save=True)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)

        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 7
0
def test_q_learning_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(datetime.now().strftime("%H%M%S%f"))

    pi, mdp, _ = initialize()
    agent_save = QLearning(mdp.info, pi, Parameter(.5))

    core = Core(agent_save, mdp)

    # Train
    core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 8
0
def test_prioritized_dqn_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(datetime.now().strftime("%H%M%S%f"))

    replay_memory = PrioritizedReplayMemory(
        50, 500, alpha=.6,
        beta=LinearParameter(.4, threshold_value=1, n=500 // 5)
    )
    params = dict(batch_size=50, initial_replay_size=50,
                  max_replay_size=500, target_update_frequency=50,
                  replay_memory=replay_memory)
    agent_save = learn(DQN, params)

    agent_save.save(agent_path, full_save=True)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)

        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 9
0
def test_categorical_dqn_save():
    params = dict(batch_size=50,
                  n_approximators=1,
                  initial_replay_size=50,
                  max_replay_size=5000,
                  target_update_frequency=50)
    agent_save = learn(CategoricalDQN, params)

    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        #print('{}: {}'.format(att, type(save_attr)))

        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 10
0
def test_q_learning_save():
    pi, mdp, _ = initialize()
    agent_save = QLearning(mdp.info, pi, Parameter(.5))

    core = Core(agent_save, mdp)

    # Train
    core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)

    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        #print('{}: {}'.format(att, type(save_attr)))
        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 11
0
def test_sarsa_lambda_continuous_linear_save():
    pi, _, mdp_continuous = initialize()
    mdp_continuous.seed(1)
    n_tilings = 1
    tilings = Tiles.generate(n_tilings, [2, 2],
                             mdp_continuous.info.observation_space.low,
                             mdp_continuous.info.observation_space.high)
    features = Features(tilings=tilings)

    approximator_params = dict(
        input_shape=(features.size, ),
        output_shape=(mdp_continuous.info.action_space.n, ),
        n_actions=mdp_continuous.info.action_space.n)
    agent_save = SARSALambdaContinuous(mdp_continuous.info,
                                       pi,
                                       LinearApproximator,
                                       Parameter(.1),
                                       .9,
                                       features=features,
                                       approximator_params=approximator_params)

    core = Core(agent_save, mdp_continuous)

    # Train
    core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)

    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        #print('{}: {}'.format(att, type(save_attr)))

        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 12
0
def test_true_online_sarsa_lambda_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(
        datetime.now().strftime("%H%M%S%f"))

    pi, _, mdp_continuous = initialize()
    mdp_continuous.seed(1)
    n_tilings = 1
    tilings = Tiles.generate(n_tilings, [2, 2],
                             mdp_continuous.info.observation_space.low,
                             mdp_continuous.info.observation_space.high)
    features = Features(tilings=tilings)

    approximator_params = dict(
        input_shape=(features.size, ),
        output_shape=(mdp_continuous.info.action_space.n, ),
        n_actions=mdp_continuous.info.action_space.n)
    agent_save = TrueOnlineSARSALambda(mdp_continuous.info,
                                       pi,
                                       Parameter(.1),
                                       .9,
                                       features=features,
                                       approximator_params=approximator_params)

    core = Core(agent_save, mdp_continuous)

    # Train
    core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)

        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 13
0
def test_sarsa_lambda_continuous_nn_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(
        datetime.now().strftime("%H%M%S%f"))

    pi, _, mdp_continuous = initialize()
    mdp_continuous.seed(1)

    features = Features(
        n_outputs=mdp_continuous.info.observation_space.shape[0])

    approximator_params = dict(
        input_shape=(features.size, ),
        output_shape=(mdp_continuous.info.action_space.n, ),
        network=Network,
        n_actions=mdp_continuous.info.action_space.n)
    agent_save = SARSALambdaContinuous(mdp_continuous.info,
                                       pi,
                                       TorchApproximator,
                                       Parameter(.1),
                                       .9,
                                       features=features,
                                       approximator_params=approximator_params)

    core = Core(agent_save, mdp_continuous)

    # Train
    core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)

        tu.assert_eq(save_attr, load_attr)
def test_TRPO_save():
    params = dict(ent_coeff=0.0,
                  max_kl=.001,
                  lam=.98,
                  n_epochs_line_search=10,
                  n_epochs_cg=10,
                  cg_damping=1e-2,
                  cg_residual_tol=1e-10,
                  quiet=True)

    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save = learn(TRPO, params)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        #print('{}: {}'.format(att, type(save_attr)))
        tu.assert_eq(save_attr, load_attr)
Exemplo n.º 15
0
 def _load_agent(self, path):
     return Agent.load(path)
Exemplo n.º 16
0
        potential_violation_times, env.resource_edges
    ],
                                         dtype=object)
    mdp = StrictResourceTargetTopEnvironment("dataset.db",
                                             params['area'],
                                             observation,
                                             gamma=params['gamma'],
                                             allow_wait=True,
                                             start_hour=params['start_hour'],
                                             end_hour=params['end_hour'],
                                             add_time=params['gamma'] < 1,
                                             speed=speed)

    # days = [i for i in range(1, 356) if i % 13 == 1]  # validation
    days = [i for i in range(1, 356) if i % 13 == 0]  # test

    agent = Agent(mdp.info, Greedy(mdp, speed))
    core = Core(agent, mdp)
    value = np.mean(compute_J(core.evaluate(initial_states=days, render=True)))
    print("avg J Greedy", value)
    # mdp.save_rendered("greedy.mp4", 10000)

    t, h = .1, 600.0
    agent = Agent(mdp.info, ACO(mdp, sys.maxsize, t, speed, max_time=h))
    core = Core(agent, mdp)
    value = np.mean(compute_J(core.evaluate(initial_states=days, render=True)))
    print("avg J ACO", t, h, value)
    # mdp.save_rendered("aco_%d_%d.mp4" % (t * 60, h))

    mdp.close()