Пример #1
0
                              testID=MG_two_storages_env.TEST_MODE,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will
    # disable these controllers for the whole duration of the validation epochs interleaved this way, using the
    # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to
    # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call
    # the summarizePerformance method of MG_two_storage_env.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=MG_two_storages_env.VALIDATION_MODE,
            epoch_length=parameters.steps_per_test,
            controllers_to_disable=[0, 1, 2, 3, 4, 7],
            periodicity=2,
            show_score=True,
            summarize_every=-1))

    # Besides inserting a validation epoch (required if one wants to find the best neural network over all training
    # epochs), we also wish to interleave a "test epoch" between each training epoch ("one of two epochs", hence the
    # periodicity=2). We do not want these test epoch to interfere with the training of the agent nor with its
    # validation. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved
    # this way, using the controllersToDisable argument of the InterleavedTestEpochController. For each test epoch, we
    # want also to display the sum of all rewards obtained, hence the showScore=True. Finally, we want to call the
    # summarizePerformance method of MG_two_storage_env every [parameters.period_btw_summary_perfs] *test* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=MG_two_storages_env.TEST_MODE,
            epoch_length=parameters.steps_per_test,
    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(environment=env, random_state=rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(env, qnetwork, random_state=rng)

    # --- Bind controllers to the agent ---
    # Before every training epoch, we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController())

    # During training epochs, we want to train the agent after every action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode.
    agent.attach(bc.TrainerController())

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the
    # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this
    # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument
    # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added.
    agent.attach(
        bc.InterleavedTestEpochController(epoch_length=500,
                                          controllers_to_disable=[0, 1]))

    # --- Run the experiment ---
    agent.run(n_epochs=100, epoch_length=1000)
Пример #3
0
    #    testID=None,
    #    unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=0,
            epoch_length=parameters.steps_per_test,
            controllers_to_disable=[0, 1, 2, 3, 4, 6, 7, 8],
            periodicity=2,
            show_score=True,
            summarize_every=1))

    agent.attach(
        bc.InterleavedTestEpochController(
            id=1,
            epoch_length=parameters.steps_per_test,
            controllers_to_disable=[0, 1, 2, 3, 4, 5, 7, 8],
            periodicity=2,
            show_score=True,
            summarize_every=1))

    agent.attach(
        bc.InterleavedTestEpochController(
Пример #4
0
            initial_learning_rate=parameters.learning_rate,
            learning_rate_decay=parameters.learning_rate_decay,
            periodicity=1))

    agent.attach(
        bc.DiscountFactorController(
            initial_discount_factor=parameters.discount,
            discount_factor_growth=parameters.discount_inc,
            discount_factor_max=parameters.discount_max,
            periodicity=1))

    agent.attach(
        bc.EpsilonController(initial_e=parameters.epsilon_start,
                             e_decays=parameters.epsilon_decay,
                             e_min=parameters.epsilon_min,
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    agent.attach(
        bc.InterleavedTestEpochController(
            id=0,
            epoch_length=parameters.steps_per_test,
            controllers_to_disable=[0, 1, 2, 3, 4],
            periodicity=2,
            show_score=True,
            summarize_every=parameters.period_btw_summary_perfs))

    # --- Run the experiment ---
    agent.run(parameters.epochs, parameters.steps_per_epoch)
Пример #5
0
 # The FindBestController will dump on disk the validation scores for each and every network, as well as the 
 # structure of the neural network having the best validation score. These dumps can then used to plot the evolution 
 # of the validation and test scores (see below) or simply recover the resulting neural network for your 
 # application.
 agent.attach(bc.FindBestController(
     validationID=simple_maze_env.VALIDATION_MODE,
     testID=None,
     unique_fname=fname))
 
 # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
 # "validation epoch" between each training epoch. For each validation epoch, we want also to display the sum of all 
 # rewards obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env 
 # every [parameters.period_btw_summary_perfs] *validation* epochs.
 agent.attach(bc.InterleavedTestEpochController(
     id=simple_maze_env.VALIDATION_MODE, 
     epoch_length=parameters.steps_per_test,
     periodicity=1,
     show_score=True,
     summarize_every=1))
 
 # --- Run the experiment ---
 try:
     os.mkdir("params")
 except Exception:
     pass
 dump(vars(parameters), "params/" + fname + ".jldump")
 agent.gathering_data=False
 agent.run(parameters.epochs, parameters.steps_per_epoch)
 
 # --- Show results ---
 basename = "scores/" + fname
 scores = load(basename + "_scores.jldump")
Пример #6
0
    #agent.attach(bc.FindBestController(
    #    validationID=maze_env.VALIDATION_MODE,
    #    testID=None,
    #    unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    valid0 = bc.InterleavedTestEpochController(
        id=0,
        epoch_length=parameters.steps_per_test,
        periodicity=1,
        show_score=True,
        summarize_every=1)
    agent.attach(valid0)

    valid1 = bc.InterleavedTestEpochController(
        id=1,
        epoch_length=parameters.steps_per_test,
        periodicity=1,
        show_score=True,
        summarize_every=1)
    agent.attach(valid1)

    valid2 = bc.InterleavedTestEpochController(
        id=2,
        epoch_length=parameters.steps_per_test,
Пример #7
0
                              testID=None,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=ALE_env.VALIDATION_MODE,
            epoch_length=parameters.steps_per_test,
            controllers_to_disable=[0, 1, 2, 3, 4],
            periodicity=2,
            show_score=True,
            summarize_every=1))

    # --- Run the experiment ---
    try:
        os.mkdir("params")
    except Exception:
        pass
    dump(vars(parameters), "params/" + fname + ".jldump")
    agent.run(parameters.epochs, parameters.steps_per_epoch)

    # --- Show results ---
    basename = "scores/" + fname
    scores = load(basename + "_scores.jldump")
Пример #8
0
                              testID=None,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=PLE_env.VALIDATION_MODE,
            epochLength=parameters.steps_per_test,
            controllersToDisable=[0, 1, 2, 3, 4],
            periodicity=2,
            showScore=True,
            summarizeEvery=1))

    # --- Run the experiment ---
    try:
        os.mkdir("params")
    except Exception:
        pass
    dump(vars(parameters), "params/" + fname + ".jldump")
    agent.run(parameters.epochs, parameters.steps_per_epoch)

    # --- Show results ---
    basename = "scores/" + fname
    scores = joblib.load(basename + "_scores.jldump")
Пример #9
0
     validationID=MG_two_storages_env.VALIDATION_MODE, 
     testID=MG_two_storages_env.TEST_MODE,
     unique_fname=fname))
 
 # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
 # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want 
 # these validation epoch to interfere with the training of the agent, which is well established by the 
 # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will 
 # disable these controllers for the whole duration of the validation epochs interleaved this way, using the 
 # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to 
 # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call 
 # the summarizePerformance method of MG_two_storage_env.
 agent.attach(bc.InterleavedTestEpochController(
     id=MG_two_storages_env.VALIDATION_MODE, 
     epochLength=parameters.steps_per_test, 
     controllersToDisable=[0, 1, 2, 3, 4, 7], 
     periodicity=2, 
     showScore=True,
     summarizeEvery=-1))
 
 # Besides inserting a validation epoch (required if one wants to find the best neural network over all training
 # epochs), we also wish to interleave a "test epoch" between each training epoch ("one of two epochs", hence the 
 # periodicity=2). We do not want these test epoch to interfere with the training of the agent nor with its 
 # validation. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved 
 # this way, using the controllersToDisable argument of the InterleavedTestEpochController. For each test epoch, we 
 # want also to display the sum of all rewards obtained, hence the showScore=True. Finally, we want to call the 
 # summarizePerformance method of MG_two_storage_env every [parameters.period_btw_summary_perfs] *test* epochs.
 agent.attach(bc.InterleavedTestEpochController(
     id=MG_two_storages_env.TEST_MODE,
     epochLength=parameters.steps_per_test,
     controllersToDisable=[0, 1, 2, 3, 4, 6],
Пример #10
0
    agent.attach(
        bc.EpsilonController(initial_e=parameters.epsilon_start,
                             e_decays=parameters.epsilon_decay,
                             e_min=parameters.epsilon_min,
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "test epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want these
    # test epoch to interfere with the training of the agent, which is well established by the TrainerController,
    # EpsilonController and alike. Therefore, we will disable these controllers for the whole duration of the test
    # epochs interleaved this way, using the controllersToDisable argument of the InterleavedTestEpochController.
    # The value of this argument is a list of the indexes of all controllers to disable, their index reflecting in
    # which order they were added. Here, "0" is refering to the firstly attached controller, thus the
    # VerboseController; "2" refers to the thirdly attached controller, thus the LearningRateController; etc. The order
    # in which the indexes are listed is not important.
    # For each test epoch, we want also to display the sum of all rewards obtained, hence the showScore=True.
    # Finally, we want to call the summarizePerformance method of Toy_Env every [parameters.period_btw_summary_perfs]
    # *test* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=0,
            epoch_length=parameters.steps_per_test,
            periodicity=1,
            show_score=True,
            summarize_every=parameters.period_btw_summary_perfs))

    # --- Run the experiment ---
    agent.run(parameters.epochs, parameters.steps_per_epoch)
Пример #11
0
                        batch_size=32,
                        random_state=rng)
    agent.setDiscountFactor(0.95)
    agent.attach(bc.FindBestController(validationID=0,
                                       unique_fname=args.fname))
    agent.attach(bc.VerboseController())
    agent.attach(bc.TrainerController())
    agent.attach(
        bc.EpsilonController(initial_e=0.8,
                             e_decays=args.epochs[0] * args.epochs[1],
                             e_min=0.2))
    agent.attach(
        bc.LearningRateController(args.learning_rate[0], args.learning_rate[1],
                                  args.learning_rate[2]))
    agent.attach(
        bc.InterleavedTestEpochController(epoch_length=1000,
                                          controllers_to_disable=[1, 2, 3, 4]))
elif args.network == 'DDPG':
    network = MyACNetwork(environment=env,
                          batch_size=32,
                          double_Q=True,
                          freeze_interval=args.epochs[1],
                          random_state=rng)
    agent = NeuralAgent(
        env,
        network,
        train_policy=GaussianNoiseExplorationPolicy(
            network, env.nActions(), rng, .5) if args.exploration == 'gauss'
        else EpsilonGreedyPolicy(network, env.nActions(), rng, 0.1),
        replay_memory_size=min(args.epochs[0] * args.epochs[1] * 2, 100000),
        batch_size=32,
        random_state=rng)
Пример #12
0
        bc.FindBestController(validationID=MG_two_storages_env.VALIDATION_MODE,
                              testID=MG_two_storages_env.TEST_MODE,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will
    # disable these controllers for the whole duration of the validation epochs interleaved this way, using the
    # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to
    # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call
    # the summarizePerformance method of MG_two_storage_env.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=MG_two_storages_env.VALIDATION_MODE,
            epoch_length=parameters.steps_per_epoch,
            periodicity=1,
            show_score=True,
            summarize_every=-1))

    # Besides inserting a validation epoch (required if one wants to find the best neural network over all training
    # epochs), we also wish to interleave a "test epoch" between each training epoch ("one of two epochs", hence the
    # periodicity=2). We do not want these test epoch to interfere with the training of the agent nor with its
    # validation. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved
    # this way, using the controllersToDisable argument of the InterleavedTestEpochController. For each test epoch, we
    # want also to display the sum of all rewards obtained, hence the showScore=True. Finally, we want to call the
    # summarizePerformance method of MG_two_storage_env every [parameters.period_btw_summary_perfs] *test* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=MG_two_storages_env.TEST_MODE,
            epoch_length=parameters.steps_per_test,
            periodicity=1,
Пример #13
0
from deer.learning_algos.q_net_keras import MyQNetwork
from Toy_env import MyEnv as Toy_env
import deer.experiment.base_controllers as bc

rng = np.random.RandomState(123456)

# --- Instantiate environment ---
env = Toy_env(rng)

# --- Instantiate qnetwork ---
qnetwork = MyQNetwork(environment=env, random_state=rng)

# --- Instantiate agent ---
agent = NeuralAgent(env, qnetwork, random_state=rng)

# --- Bind controllers to the agent ---
# Before every training epoch, we want to print a summary of the agent's epsilon, discount and
# learning rate as well as the training epoch number.
agent.attach(bc.VerboseController())

# During training epochs, we want to train the agent after every action it takes.
# Plus, we also want to display after each training episode (!= than after every training) the average bellman
# residual and the average of the V values obtained during the last episode.
agent.attach(bc.TrainerController())

# We also want to interleave a "test epoch" between each training epoch.
agent.attach(bc.InterleavedTestEpochController(epoch_length=500))

# --- Run the experiment ---
agent.run(n_epochs=100, epoch_length=1000)
Пример #14
0
#        validationID=ALE_env.VALIDATION_MODE,
#        testID=None,
#        unique_fname=fname))
    
    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want 
    # these validation epoch to interfere with the training of the agent, which is well established by the 
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole 
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the 
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards 
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every 
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(bc.InterleavedTestEpochController(
        id=ALE_env.VALIDATION_MODE, 
        epoch_length=parameters.steps_per_test,
        controllers_to_disable=[0, 1, 2, 3, 4, 6,7,8],
        periodicity=2,
        show_score=True,
        summarize_every=1))

    agent.attach(bc.InterleavedTestEpochController(
        id=ALE_env.VALIDATION_MODE+1, 
        epoch_length=parameters.steps_per_test,
        controllers_to_disable=[0, 1, 2, 3, 4, 5, 7,8],
        periodicity=2,
        show_score=True,
        summarize_every=1))

    agent.attach(bc.InterleavedTestEpochController(
        id=ALE_env.VALIDATION_MODE+2, 
        epoch_length=parameters.steps_per_test,
Пример #15
0
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(bc.TrainerController(
        evaluate_on='action', 
        periodicity=parameters.update_frequency, 
        show_episode_avg_V_value=True, 
        show_avg_Bellman_residual=True))
    
    agent.attach(bc.FindBestController(
        validationID=2,
        testID=None,
        unique_fname=fname))

    agent.attach(bc.InterleavedTestEpochController(
        id=2, 
        epoch_length=parameters.steps_per_test,
        periodicity=1,
        show_score=True,
        summarize_every=1))

    # --- Run the experiment ---
    try:
        os.mkdir("params")
    except Exception:
        pass
    dump(vars(parameters), "params/" + fname + ".jldump")
    agent.gathering_data=False
    agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97")
    #freeze network except encoder
    agent._learning_algo.freezeAllLayersExceptEncoder()
    agent._learning_algo.resetEncoder()
    agent.run(parameters.epochs, parameters.steps_per_epoch)
Пример #16
0
    agent.attach(bc.LearningRateController(
        initialLearningRate=parameters.learning_rate,
        learningRateDecay=parameters.learning_rate_decay,
        periodicity=1))

    agent.attach(bc.DiscountFactorController(
        initialDiscountFactor=parameters.discount,
        discountFactorGrowth=parameters.discount_inc,
        discountFactorMax=parameters.discount_max,
        periodicity=1))

    agent.attach(bc.EpsilonController(
        initialE=parameters.epsilon_start, 
        eDecays=parameters.epsilon_decay, 
        eMin=parameters.epsilon_min,
        evaluateOn='action', 
        periodicity=1, 
        resetEvery='none'))

    agent.attach(bc.InterleavedTestEpochController(
        id=0, 
        epochLength=parameters.steps_per_test, 
        controllersToDisable=[0, 1, 2, 3, 4], 
        periodicity=2, 
        showScore=True,
        summarizeEvery=parameters.period_btw_summary_perfs))
    
    # --- Run the experiment ---
    agent.run(parameters.epochs, parameters.steps_per_epoch)