示例#1
0
    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(
        environment=env,
        random_state=rng)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        random_state=rng)

    # --- Bind controllers to the agent ---
    # Before every training epoch, we want to print a summary of the agent's epsilon, discount and 
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController())

    # During training epochs, we want to train the agent after every action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode.
    agent.attach(bc.TrainerController())

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
    # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the 
    # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this 
    # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument 
    # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added.
    agent.attach(bc.InterleavedTestEpochController(
        epoch_length=500, 
        controllers_to_disable=[0, 1]))
        
示例#2
0
        double_Q=True)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        exp_priority=1.)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(
        evaluate_on='epoch', 
        periodicity=1))

    agent.attach(bc.TrainerController(
        evaluate_on='action', 
        periodicity=parameters.update_frequency, 
        show_episode_avg_V_value=True, 
        show_avg_Bellman_residual=True))

    agent.attach(bc.LearningRateController(
        initial_learning_rate=parameters.learning_rate,
        learning_rate_decay=parameters.learning_rate_decay,
        periodicity=1))

    agent.attach(bc.DiscountFactorController(
        initial_discount_factor=parameters.discount,
示例#3
0
        rng,
        DoubleQ=True)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(
        evaluateOn='epoch', 
        periodicity=1))

    agent.attach(bc.TrainerController(
        evaluateOn='action', 
        periodicity=parameters.update_frequency, 
        showEpisodeAvgVValue=False, 
        showAvgBellmanResidual=False))

    agent.attach(bc.LearningRateController(
        initialLearningRate=parameters.learning_rate,
        learningRateDecay=parameters.learning_rate_decay,
        periodicity=1))

    agent.attach(bc.DiscountFactorController(
        initialDiscountFactor=parameters.discount,
示例#4
0
文件: run_ALE.py 项目: VinF/deer
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        test_policy=test_policy,
    )

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "ALE_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on="epoch", periodicity=1))

    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(
        bc.TrainerController(
            evaluate_on="action",
            periodicity=parameters.update_frequency,
            show_episode_avg_V_value=True,
            show_avg_Bellman_residual=True,
        )
    )

    # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we
    # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
示例#5
0
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        test_policy=test_policy)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "test_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
    # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
    # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
    # episode or epoch (or never, hence the resetEvery='none').
    agent.attach(
        bc.EpsilonController(initial_e=parameters.epsilon_start,
                             e_decays=parameters.epsilon_decay,
                             e_min=parameters.epsilon_min,
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    # --- Run the experiment ---
    try:
示例#6
0
    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        qnetwork,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        exp_priority=1.,
                        train_policy=train_policy,
                        test_policy=test_policy)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    agent.attach(
        bc.TrainerController(evaluate_on='action',
                             periodicity=parameters.update_frequency,
                             show_episode_avg_V_value=True,
                             show_avg_Bellman_residual=True))

    agent.attach(
        bc.LearningRateController(
            initial_learning_rate=parameters.learning_rate,
            learning_rate_decay=parameters.learning_rate_decay,
            periodicity=1))

    agent.attach(
        bc.DiscountFactorController(
示例#7
0
rng = np.random.RandomState(123456)

# --- Instantiate environment ---
env = Toy_env(rng)

# --- Instantiate qnetwork ---
qnetwork = MyQNetwork(environment=env, random_state=rng)

# --- Instantiate agent ---
agent = NeuralAgent(env, qnetwork, random_state=rng)

# --- Bind controllers to the agent ---
# Before every training epoch, we want to print a summary of the agent's epsilon, discount and
# learning rate as well as the training epoch number.
agent.attach(bc.VerboseController())

# During training epochs, we want to train the agent after every action it takes.
# Plus, we also want to display after each training episode (!= than after every training) the average bellman
# residual and the average of the V values obtained during the last episode.
agent.attach(bc.TrainerController())

# All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
# "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the
# agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this
# way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument
# is a list of the indexes of all controllers to disable, their index reflecting in which order they were added.
agent.attach(
    bc.InterleavedTestEpochController(epoch_length=500,
                                      controllers_to_disable=[0, 1]))
示例#8
0
rng = np.random.RandomState(123456)

# --- Instantiate environment ---
env = Toy_env(rng)

# --- Instantiate qnetwork ---
qnetwork = MyQNetwork(environment=env, random_state=rng)

# --- Instantiate agent ---
agent = NeuralAgent(env, qnetwork, random_state=rng)

# --- Bind controllers to the agent ---
# Before every training epoch, we want to print a summary of the agent's epsilon, discount and
# learning rate as well as the training epoch number.
agent.attach(bc.VerboseController())

# During training epochs, we want to train the agent after every action it takes.
# Plus, we also want to display after each training episode (!= than after every training) the average bellman
# residual and the average of the V values obtained during the last episode.
agent.attach(bc.TrainerController())

# All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
# "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the
# agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this
# way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument
# is a list of the indexes of all controllers to disable, their index reflecting in which order they were added.
agent.attach(bc.InterleavedTestEpochController(epoch_length=500))

# --- Run the experiment ---
agent.run(n_epochs=100, epoch_length=1000)
示例#9
0
        rng,
        DoubleQ=True)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(
        evaluateOn='epoch', 
        periodicity=1))

    agent.attach(bc.TrainerController(
        evaluateOn='action', 
        periodicity=parameters.update_frequency, 
        showEpisodeAvgVValue=True, 
        showAvgBellmanResidual=True))

    agent.attach(bc.LearningRateController(
        initialLearningRate=parameters.learning_rate,
        learningRateDecay=parameters.learning_rate_decay,
        periodicity=1))

    agent.attach(bc.DiscountFactorController(
        initialDiscountFactor=parameters.discount,