testID=MG_two_storages_env.TEST_MODE, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will # disable these controllers for the whole duration of the validation epochs interleaved this way, using the # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call # the summarizePerformance method of MG_two_storage_env. agent.attach( bc.InterleavedTestEpochController( id=MG_two_storages_env.VALIDATION_MODE, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4, 7], periodicity=2, show_score=True, summarize_every=-1)) # Besides inserting a validation epoch (required if one wants to find the best neural network over all training # epochs), we also wish to interleave a "test epoch" between each training epoch ("one of two epochs", hence the # periodicity=2). We do not want these test epoch to interfere with the training of the agent nor with its # validation. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved # this way, using the controllersToDisable argument of the InterleavedTestEpochController. For each test epoch, we # want also to display the sum of all rewards obtained, hence the showScore=True. Finally, we want to call the # summarizePerformance method of MG_two_storage_env every [parameters.period_btw_summary_perfs] *test* epochs. agent.attach( bc.InterleavedTestEpochController( id=MG_two_storages_env.TEST_MODE, epoch_length=parameters.steps_per_test,
# --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added. agent.attach( bc.InterleavedTestEpochController(epoch_length=500, controllers_to_disable=[0, 1])) # --- Run the experiment --- agent.run(n_epochs=100, epoch_length=1000)
# testID=None, # unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every # [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach( bc.InterleavedTestEpochController( id=0, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4, 6, 7, 8], periodicity=2, show_score=True, summarize_every=1)) agent.attach( bc.InterleavedTestEpochController( id=1, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4, 5, 7, 8], periodicity=2, show_score=True, summarize_every=1)) agent.attach( bc.InterleavedTestEpochController(
initial_learning_rate=parameters.learning_rate, learning_rate_decay=parameters.learning_rate_decay, periodicity=1)) agent.attach( bc.DiscountFactorController( initial_discount_factor=parameters.discount, discount_factor_growth=parameters.discount_inc, discount_factor_max=parameters.discount_max, periodicity=1)) agent.attach( bc.EpsilonController(initial_e=parameters.epsilon_start, e_decays=parameters.epsilon_decay, e_min=parameters.epsilon_min, evaluate_on='action', periodicity=1, reset_every='none')) agent.attach( bc.InterleavedTestEpochController( id=0, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4], periodicity=2, show_score=True, summarize_every=parameters.period_btw_summary_perfs)) # --- Run the experiment --- agent.run(parameters.epochs, parameters.steps_per_epoch)
# The FindBestController will dump on disk the validation scores for each and every network, as well as the # structure of the neural network having the best validation score. These dumps can then used to plot the evolution # of the validation and test scores (see below) or simply recover the resulting neural network for your # application. agent.attach(bc.FindBestController( validationID=simple_maze_env.VALIDATION_MODE, testID=None, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch. For each validation epoch, we want also to display the sum of all # rewards obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env # every [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach(bc.InterleavedTestEpochController( id=simple_maze_env.VALIDATION_MODE, epoch_length=parameters.steps_per_test, periodicity=1, show_score=True, summarize_every=1)) # --- Run the experiment --- try: os.mkdir("params") except Exception: pass dump(vars(parameters), "params/" + fname + ".jldump") agent.gathering_data=False agent.run(parameters.epochs, parameters.steps_per_epoch) # --- Show results --- basename = "scores/" + fname scores = load(basename + "_scores.jldump")
#agent.attach(bc.FindBestController( # validationID=maze_env.VALIDATION_MODE, # testID=None, # unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every # [parameters.period_btw_summary_perfs] *validation* epochs. valid0 = bc.InterleavedTestEpochController( id=0, epoch_length=parameters.steps_per_test, periodicity=1, show_score=True, summarize_every=1) agent.attach(valid0) valid1 = bc.InterleavedTestEpochController( id=1, epoch_length=parameters.steps_per_test, periodicity=1, show_score=True, summarize_every=1) agent.attach(valid1) valid2 = bc.InterleavedTestEpochController( id=2, epoch_length=parameters.steps_per_test,
testID=None, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every # [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach( bc.InterleavedTestEpochController( id=ALE_env.VALIDATION_MODE, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4], periodicity=2, show_score=True, summarize_every=1)) # --- Run the experiment --- try: os.mkdir("params") except Exception: pass dump(vars(parameters), "params/" + fname + ".jldump") agent.run(parameters.epochs, parameters.steps_per_epoch) # --- Show results --- basename = "scores/" + fname scores = load(basename + "_scores.jldump")
testID=None, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every # [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach( bc.InterleavedTestEpochController( id=PLE_env.VALIDATION_MODE, epochLength=parameters.steps_per_test, controllersToDisable=[0, 1, 2, 3, 4], periodicity=2, showScore=True, summarizeEvery=1)) # --- Run the experiment --- try: os.mkdir("params") except Exception: pass dump(vars(parameters), "params/" + fname + ".jldump") agent.run(parameters.epochs, parameters.steps_per_epoch) # --- Show results --- basename = "scores/" + fname scores = joblib.load(basename + "_scores.jldump")
validationID=MG_two_storages_env.VALIDATION_MODE, testID=MG_two_storages_env.TEST_MODE, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will # disable these controllers for the whole duration of the validation epochs interleaved this way, using the # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call # the summarizePerformance method of MG_two_storage_env. agent.attach(bc.InterleavedTestEpochController( id=MG_two_storages_env.VALIDATION_MODE, epochLength=parameters.steps_per_test, controllersToDisable=[0, 1, 2, 3, 4, 7], periodicity=2, showScore=True, summarizeEvery=-1)) # Besides inserting a validation epoch (required if one wants to find the best neural network over all training # epochs), we also wish to interleave a "test epoch" between each training epoch ("one of two epochs", hence the # periodicity=2). We do not want these test epoch to interfere with the training of the agent nor with its # validation. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved # this way, using the controllersToDisable argument of the InterleavedTestEpochController. For each test epoch, we # want also to display the sum of all rewards obtained, hence the showScore=True. Finally, we want to call the # summarizePerformance method of MG_two_storage_env every [parameters.period_btw_summary_perfs] *test* epochs. agent.attach(bc.InterleavedTestEpochController( id=MG_two_storages_env.TEST_MODE, epochLength=parameters.steps_per_test, controllersToDisable=[0, 1, 2, 3, 4, 6],
agent.attach( bc.EpsilonController(initial_e=parameters.epsilon_start, e_decays=parameters.epsilon_decay, e_min=parameters.epsilon_min, evaluate_on='action', periodicity=1, reset_every='none')) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want these # test epoch to interfere with the training of the agent, which is well established by the TrainerController, # EpsilonController and alike. Therefore, we will disable these controllers for the whole duration of the test # epochs interleaved this way, using the controllersToDisable argument of the InterleavedTestEpochController. # The value of this argument is a list of the indexes of all controllers to disable, their index reflecting in # which order they were added. Here, "0" is refering to the firstly attached controller, thus the # VerboseController; "2" refers to the thirdly attached controller, thus the LearningRateController; etc. The order # in which the indexes are listed is not important. # For each test epoch, we want also to display the sum of all rewards obtained, hence the showScore=True. # Finally, we want to call the summarizePerformance method of Toy_Env every [parameters.period_btw_summary_perfs] # *test* epochs. agent.attach( bc.InterleavedTestEpochController( id=0, epoch_length=parameters.steps_per_test, periodicity=1, show_score=True, summarize_every=parameters.period_btw_summary_perfs)) # --- Run the experiment --- agent.run(parameters.epochs, parameters.steps_per_epoch)
batch_size=32, random_state=rng) agent.setDiscountFactor(0.95) agent.attach(bc.FindBestController(validationID=0, unique_fname=args.fname)) agent.attach(bc.VerboseController()) agent.attach(bc.TrainerController()) agent.attach( bc.EpsilonController(initial_e=0.8, e_decays=args.epochs[0] * args.epochs[1], e_min=0.2)) agent.attach( bc.LearningRateController(args.learning_rate[0], args.learning_rate[1], args.learning_rate[2])) agent.attach( bc.InterleavedTestEpochController(epoch_length=1000, controllers_to_disable=[1, 2, 3, 4])) elif args.network == 'DDPG': network = MyACNetwork(environment=env, batch_size=32, double_Q=True, freeze_interval=args.epochs[1], random_state=rng) agent = NeuralAgent( env, network, train_policy=GaussianNoiseExplorationPolicy( network, env.nActions(), rng, .5) if args.exploration == 'gauss' else EpsilonGreedyPolicy(network, env.nActions(), rng, 0.1), replay_memory_size=min(args.epochs[0] * args.epochs[1] * 2, 100000), batch_size=32, random_state=rng)
bc.FindBestController(validationID=MG_two_storages_env.VALIDATION_MODE, testID=MG_two_storages_env.TEST_MODE, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will # disable these controllers for the whole duration of the validation epochs interleaved this way, using the # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call # the summarizePerformance method of MG_two_storage_env. agent.attach( bc.InterleavedTestEpochController( id=MG_two_storages_env.VALIDATION_MODE, epoch_length=parameters.steps_per_epoch, periodicity=1, show_score=True, summarize_every=-1)) # Besides inserting a validation epoch (required if one wants to find the best neural network over all training # epochs), we also wish to interleave a "test epoch" between each training epoch ("one of two epochs", hence the # periodicity=2). We do not want these test epoch to interfere with the training of the agent nor with its # validation. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved # this way, using the controllersToDisable argument of the InterleavedTestEpochController. For each test epoch, we # want also to display the sum of all rewards obtained, hence the showScore=True. Finally, we want to call the # summarizePerformance method of MG_two_storage_env every [parameters.period_btw_summary_perfs] *test* epochs. agent.attach( bc.InterleavedTestEpochController( id=MG_two_storages_env.TEST_MODE, epoch_length=parameters.steps_per_test, periodicity=1,
from deer.learning_algos.q_net_keras import MyQNetwork from Toy_env import MyEnv as Toy_env import deer.experiment.base_controllers as bc rng = np.random.RandomState(123456) # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # We also want to interleave a "test epoch" between each training epoch. agent.attach(bc.InterleavedTestEpochController(epoch_length=500)) # --- Run the experiment --- agent.run(n_epochs=100, epoch_length=1000)
# validationID=ALE_env.VALIDATION_MODE, # testID=None, # unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every # [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach(bc.InterleavedTestEpochController( id=ALE_env.VALIDATION_MODE, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4, 6,7,8], periodicity=2, show_score=True, summarize_every=1)) agent.attach(bc.InterleavedTestEpochController( id=ALE_env.VALIDATION_MODE+1, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4, 5, 7,8], periodicity=2, show_score=True, summarize_every=1)) agent.attach(bc.InterleavedTestEpochController( id=ALE_env.VALIDATION_MODE+2, epoch_length=parameters.steps_per_test,
# Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode, hence the two last arguments. agent.attach(bc.TrainerController( evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True)) agent.attach(bc.FindBestController( validationID=2, testID=None, unique_fname=fname)) agent.attach(bc.InterleavedTestEpochController( id=2, epoch_length=parameters.steps_per_test, periodicity=1, show_score=True, summarize_every=1)) # --- Run the experiment --- try: os.mkdir("params") except Exception: pass dump(vars(parameters), "params/" + fname + ".jldump") agent.gathering_data=False agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97") #freeze network except encoder agent._learning_algo.freezeAllLayersExceptEncoder() agent._learning_algo.resetEncoder() agent.run(parameters.epochs, parameters.steps_per_epoch)
agent.attach(bc.LearningRateController( initialLearningRate=parameters.learning_rate, learningRateDecay=parameters.learning_rate_decay, periodicity=1)) agent.attach(bc.DiscountFactorController( initialDiscountFactor=parameters.discount, discountFactorGrowth=parameters.discount_inc, discountFactorMax=parameters.discount_max, periodicity=1)) agent.attach(bc.EpsilonController( initialE=parameters.epsilon_start, eDecays=parameters.epsilon_decay, eMin=parameters.epsilon_min, evaluateOn='action', periodicity=1, resetEvery='none')) agent.attach(bc.InterleavedTestEpochController( id=0, epochLength=parameters.steps_per_test, controllersToDisable=[0, 1, 2, 3, 4], periodicity=2, showScore=True, summarizeEvery=parameters.period_btw_summary_perfs)) # --- Run the experiment --- agent.run(parameters.epochs, parameters.steps_per_epoch)