import numpy as np

from deer.agent import NeuralAgent
from deer.q_networks.q_net_theano import MyQNetwork
from Toy_env import MyEnv as Toy_env
import deer.experiment.base_controllers as bc

if __name__ == "__main__":

    rng = np.random.RandomState(123456)

    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(environment=env, random_state=rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(env, qnetwork, random_state=rng)

    # --- Bind controllers to the agent ---
    # Before every training epoch, we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController())

    # During training epochs, we want to train the agent after every action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode.
    agent.attach(bc.TrainerController())

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
示例#2
0
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = MG_two_storages_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_delta,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.update_rule, rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env, qnetwork, parameters.replay_memory_size,
        max(env.inputDimensions()[i][0]
            for i in range(len(env.inputDimensions()))), parameters.batch_size,
        rng)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "MG2S_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))
示例#3
0
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(12345)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = pendulum_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env,
                          parameters.rms_decay,
                          parameters.rms_epsilon,
                          parameters.momentum,
                          parameters.clip_delta,
                          parameters.freeze_interval,
                          parameters.batch_size,
                          parameters.network_type,
                          parameters.update_rule,
                          parameters.batch_accumulator,
                          rng,
                          DoubleQ=True)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env, qnetwork, parameters.replay_memory_size,
        max(env.inputDimensions()[i][0]
            for i in range(len(env.inputDimensions()))), parameters.batch_size,
        rng)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py