target_update=10,
        batch_size=512,
        learning_rate=0.01),  # torch default is 1e-2
    episode_count=training_episode_count,
    iteration_count=iteration_count,
    epsilon=0.90,
    render=False,
    # epsilon_multdecay=0.75,  # 0.999,
    epsilon_exponential_decay=5000,  # 10000
    epsilon_minimum=0.10,
    verbosity=Verbosity.Quiet,
    title="DQL")

# %% Plot episode length

p.plot_episodes_length([best_dqn_learning_run_10])

# %%
if not os.path.exists("images"):
    os.mkdir("images")

# %%
dql_exploit_run = learner.epsilon_greedy_search(
    cyberbattlechain_10,
    ep,
    learner=best_dqn_learning_run_10['learner'],
    episode_count=eval_episode_count,
    iteration_count=iteration_count,
    epsilon=0.0,  # 0.35,
    render=False,
    render_last_episode_rewards_to='images/chain10',
Exemplo n.º 2
0
qlearning_results = [qlearning_run(gamma, cyberbattlechain_10) for gamma in gamma_sweep]

qlearning_bestrun_10 = qlearning_results[0]

# %%

p.new_plot_loss()
for results in qlearning_results:
    p.plot_all_episodes_loss(cast(a.QTabularLearner, results['learner']).loss_qsource.all_episodes, 'Q_source', results['title'])
    p.plot_all_episodes_loss(cast(a.QTabularLearner, results['learner']).loss_qattack.all_episodes, 'Q_attack', results['title'])
plt.legend(loc="upper right")
plt.show()

# %% Plot episode length

p.plot_episodes_length(qlearning_results)


# %%


nolearning_results = learner.epsilon_greedy_search(
    cyberbattlechain_10,
    ep,
    learner=a.QTabularLearner(ep, trained=qlearning_bestrun_10['learner'],
                              gamma=0.0, learning_rate=0.0, exploit_percentile=100),
    episode_count=eval_episode_count,
    iteration_count=iteration_count,
    epsilon=0.30,  # 0.35,
    render=False,
    title="Exploiting Q-matrix",
Exemplo n.º 3
0
# Plot averaged cumulative rewards for DQL vs Random vs DQL-Exploit
themodel = dqla.CyberBattleStateActionModel(ep)
p.plot_averaged_cummulative_rewards(
    all_runs=all_runs,
    title=f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n'
    f'State: {[f.name() for f in themodel.state_space.feature_selection]} '
    f'({len(themodel.state_space.feature_selection)}\n'
    f"Action: abstract_action ({themodel.action_space.flat_size()})")

# %%
contenders = [
    credlookup_run,
    tabularq_run,
    dql_run,
    dql_exploit_run
]
p.plot_episodes_length(contenders)
p.plot_averaged_cummulative_rewards(
    title=f'Agent Benchmark top contenders\n'
    f'max_nodes:{ep.maximum_node_count}\n',
    all_runs=contenders)


# %%
# Plot cumulative rewards for all episodes
for r in contenders:
    p.plot_all_episodes(r)

# %%
    title="Random+CredLookup"
)

# %%
randomlearning_results = learner.epsilon_greedy_search(
    cyberbattlechain_10,
    environment_properties=ep,
    learner=CredentialCacheExploiter(),
    episode_count=eval_episode_count,
    iteration_count=iteration_count,
    epsilon=1.0,  # purely random
    render=False,
    verbosity=Verbosity.Quiet,
    title="Random search"
)


# %%
p.plot_episodes_length([credexplot])

p.plot_all_episodes(credexplot)

all_runs = [credexplot,
            randomlearning_results
            ]
p.plot_averaged_cummulative_rewards(
    title=f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n',
    all_runs=all_runs)

# %%
    learner=dql_run['learner'],
    episode_count=50,
    iteration_count=iteration_count,
    epsilon=0.00,
    epsilon_minimum=0.00,
    render=False,
    verbosity=Verbosity.Quiet,
    title="Exploiting DQL")

# %%
all_runs = [
    random_run, credlookup_run, tabularq_run, tabularq_exploit_run, dql_run,
    dql_exploit_run
]

p.plot_episodes_length(all_runs)
p.plot_averaged_cummulative_rewards(title=f'Agent Benchmark\n'
                                    f'max_nodes:{ep.maximum_node_count}\n',
                                    all_runs=all_runs)

# %%
contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]
p.plot_episodes_length(contenders)
p.plot_averaged_cummulative_rewards(title=f'Agent Benchmark top contenders\n'
                                    f'max_nodes:{ep.maximum_node_count}\n',
                                    all_runs=contenders)

# %%
for r in contenders:
    p.plot_all_episodes(r)