def test_agent_training() -> None: dqn_learning_run = learner.epsilon_greedy_search( cyberbattle_gym_env=cyberbattlechain, environment_properties=ep, learner=dqla.DeepQLearnerPolicy( ep=ep, gamma=0.015, replay_memory_size=10000, target_update=10, batch_size=512, learning_rate=0.01), # torch default is 1e-2 episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, # epsilon_multdecay=0.75, # 0.999, epsilon_exponential_decay=5000, # 10000 epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="DQL") assert dqn_learning_run random_run = learner.epsilon_greedy_search( cyberbattlechain, ep, learner=learner.RandomPolicy(), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random render=False, verbosity=Verbosity.Quiet, title="Random search") assert random_run
ep = w.EnvironmentBounds.of_identifiers( maximum_node_count=12, maximum_total_credentials=10, identifiers=ctf_env.identifiers ) # %% # Evaluate the Deep Q-learning agent dqn_learning_run = learner.epsilon_greedy_search( cyberbattle_gym_env=ctf_env, environment_properties=ep, learner=dqla.DeepQLearnerPolicy( ep=ep, gamma=0.015, replay_memory_size=10000, target_update=5, batch_size=512, learning_rate=0.01 # torch default learning rate is 1e-2 ), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, epsilon_exponential_decay=5000, epsilon_minimum=0.10, verbosity=Verbosity.Quiet, render=False, plot_episodes_length=False, title="DQL" ) # %%
ep = w.EnvironmentBounds.of_identifiers( maximum_total_credentials=22, maximum_node_count=22, identifiers=cyberbattlechain_defender.identifiers) iteration_count = 600 training_episode_count = 10 # %% dqn_with_defender = learner.epsilon_greedy_search( cyberbattle_gym_env=cyberbattlechain_defender, environment_properties=ep, learner=dqla.DeepQLearnerPolicy(ep=ep, gamma=0.15, replay_memory_size=10000, target_update=5, batch_size=256, learning_rate=0.01), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, epsilon_exponential_decay=5000, epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="DQL") # %% dql_exploit_run = learner.epsilon_greedy_search( cyberbattlechain_defender, ep,
epsilon=0.0, render=False, verbosity=Verbosity.Quiet, title="Exploiting Q-matrix" ) # %% # Evaluate the Deep Q-learning agent dql_run = learner.epsilon_greedy_search( cyberbattle_gym_env=gym_env, environment_properties=ep, learner=dqla.DeepQLearnerPolicy( ep=ep, gamma=0.015, replay_memory_size=10000, target_update=10, batch_size=512, # torch default learning rate is 1e-2 # a large value helps converge in less episodes learning_rate=0.01 ), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, epsilon_exponential_decay=5000, epsilon_minimum=0.10, verbosity=Verbosity.Quiet, render=False, plot_episodes_length=False, title="DQL" )