def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig( gamma=1, alpha_attacker=0.0001, alpha_defender=0.0001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=250000, attacker=True, defender=True, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=(4 + 2) * 3, output_dim_attacker=4 * 3, input_dim_defender=(4 + 2) * 3, output_dim_defender=5 * 3, hidden_dim=32, num_hidden_layers=2, batch_size=2000, gpu=True, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95) env_name = "idsgame-v16" client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value, defender_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER_AND_ATTACKER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="OpenAI-PPO vs OpenAI-PPO", run_many=False, random_seeds=[0, 999, 299, 399, 499]) # client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig( gamma=0.999, alpha_attacker=0.0001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=10000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=1000000000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=100, input_dim_attacker=((4 + 2) * 4), output_dim_attacker=(4 + 1) * 4, input_dim_defender=((4 + 1) * 4), output_dim_defender=5 * 4, hidden_dim=128, num_hidden_layers=2, batch_size=64, gpu=True, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2) env_name = "idsgame-minimal_defense-v20" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.REINFORCE_AGENT.value, mode=RunnerMode.TRAIN_ATTACKER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="REINFORCE vs DefendMinimalDefender", run_many=False, random_seeds=[0, 999, 299, 399, 499], random_seed=399) #client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig( gamma=1, alpha_attacker=0.0001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=1000, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=500, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=55000, attacker=False, defender=True, video_frequency=1001, save_dir=default_output_dir() + "/results/data", checkpoint_freq=50, input_dim_attacker=((4 + 2) * 4), output_dim_attacker=(4 + 1) * 4, input_dim_defender=((4 + 1) * 4), output_dim_defender=5 * 4, hidden_dim=128, num_hidden_layers=3, pi_hidden_layers=1, pi_hidden_dim=128, vf_hidden_layers=1, vf_hidden_dim=128, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.001, render_attacker_view=False, lr_progress_power_decay=4, lr_progress_decay=True, use_sde=False, sde_sample_freq=4, one_hot_obs=False, lstm_core=False, lstm_hidden_dim=32, multi_channel_obs=False, channel_1_dim=32, channel_1_layers=2, channel_1_input_dim=16, channel_2_dim=32, channel_2_layers=2, channel_2_input_dim=16, channel_3_dim=32, channel_3_layers=2, channel_3_input_dim=4, channel_4_dim=32, channel_4_layers=2, channel_4_input_dim=4, mini_batch_size=64, ar_policy=True, attacker_node_input_dim=((4 + 2) * 4), attacker_at_net_input_dim=(4 + 2), attacker_at_net_output_dim=(4 + 1), attacker_node_net_output_dim=4, defender_node_input_dim=((4 + 1) * 4), defender_at_net_input_dim=(4 + 1), defender_node_net_output_dim=4, defender_at_net_output_dim=5) env_name = "idsgame-maximal_attack-v19" client_config = ClientConfig( env_name=env_name, defender_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="AttackMaximalAttacker vs OpenAI-PPO", run_many=False, random_seeds=[0, 999, 299, 399, 499]) # client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ opponent_pool_config = OpponentPoolConfig(pool_maxsize=100000, pool_increment_period=50, head_to_head_period=1, quality_scores=True, quality_score_eta=0.01, initial_quality=1000, pool_prob=0.5) pg_agent_config = PolicyGradientAgentConfig( gamma=0.999, alpha_attacker=0.0001, alpha_defender=0.0001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=100, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=450001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=20000, attacker=True, defender=True, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=((4 + 2) * 4), output_dim_attacker=(4 + 1) * 4, input_dim_defender=((4 + 1) * 4), output_dim_defender=5 * 4, hidden_dim=128, num_hidden_layers=2, batch_size=64, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.99999, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, opponent_pool_config=opponent_pool_config, alternating_optimization=50, opponent_pool=True, baselines_in_pool=True, alternating_period=50) env_name = "idsgame-v20" client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.REINFORCE_AGENT.value, defender_type=AgentType.REINFORCE_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER_AND_ATTACKER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="REINFORCE vs REINFORCE", run_many=False, random_seeds=[0, 999, 299, 399, 499], random_seed=0) #client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ env_name = "idsgame-v18" pg_agent_config = PolicyGradientAgentConfig( gamma=1, alpha_attacker=0.00001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=100000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=(4 + 2) * 2, output_dim_attacker=(4 + 1) * 2, input_dim_defender=(4 + 2) * 3, output_dim_defender=5 * 3, hidden_dim=64, num_hidden_layers=4, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, attacker_load_path= "/home/kim/storage/workspace/gym-idsgame/experiments/manual_play/v18/minimal_defense/manual_vs_openai_ppo/1591093705.5003314_attacker_policy_network.zip" ) client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.MANUAL_DEFENDER.value, output_dir=default_output_dir(), title="OpenAI PPO vs ManualDefender", pg_agent_config=pg_agent_config, bot_attacker=True) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ env_name = "idsgame-v19" pg_agent_config = PolicyGradientAgentConfig( gamma=1, alpha_attacker=0.0001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=1000, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=500, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=55000, attacker=False, defender=True, video_frequency=1001, save_dir=default_output_dir() + "/results/data", checkpoint_freq=250, input_dim_attacker=((4 + 2) * 4), output_dim_attacker=(4 + 1) * 4, input_dim_defender=((4 + 1) * 4), output_dim_defender=5 * 4, hidden_dim=128, num_hidden_layers=2, pi_hidden_layers=1, pi_hidden_dim=128, vf_hidden_layers=1, vf_hidden_dim=128, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.001, render_attacker_view=False, lr_progress_power_decay=4, lr_progress_decay=True, use_sde=False, sde_sample_freq=4, one_hot_obs=False, lstm_core=False, lstm_hidden_dim=32, multi_channel_obs=False, channel_1_dim=32, channel_1_layers=2, channel_1_input_dim=16, channel_2_dim=32, channel_2_layers=2, channel_2_input_dim=16, channel_3_dim=32, channel_3_layers=2, channel_3_input_dim=4, channel_4_dim=32, channel_4_layers=2, channel_4_input_dim=4, mini_batch_size=64, ar_policy=True, attacker_node_input_dim=((4 + 2) * 4), attacker_at_net_input_dim=(4 + 2), attacker_at_net_output_dim=(4 + 1), attacker_node_net_output_dim=4, defender_node_input_dim=((4 + 1) * 4), defender_at_net_input_dim=(4 + 1), defender_node_net_output_dim=4, defender_at_net_output_dim=5, defender_load_path= "/home/kim/workspace/gym-idsgame/experiments/manual_play/v19/maximal_attack/manual_vs_openai_ppo/1592125075.4390159_defender_node_policy_network.zip" ) client_config = ClientConfig( env_name=env_name, defender_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.MANUAL_ATTACKER.value, output_dir=default_output_dir(), title="ManualAttacker vs OpenAI PPO", pg_agent_config=pg_agent_config, bot_defender=True) return client_config
pg_agent_config = PolicyGradientAgentConfig( gamma=1, alpha_attacker=0.0001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=1000, train_log_frequency=args.train_log_frequency, epsilon_decay=0.9999, video=False, eval_log_frequency=args.eval_log_frequency, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=args.num_episodes, eval_render=False, gifs=False, gif_dir=default_output_dir() + "/results/gifs/" + args.experiment_id, eval_frequency=args.eval_frequency, attacker=args.attacker, defender=args.defender, video_frequency=1001, save_dir=default_output_dir() + "/results/data/" + args.experiment_id, checkpoint_freq=250, input_dim_attacker=((4 + 2) * 4), output_dim_attacker=(4 + 1) * 4, input_dim_defender=((4 + 1) * 4), output_dim_defender=5 * 4, hidden_dim=128, num_hidden_layers=2, pi_hidden_layers=1, pi_hidden_dim=128, vf_hidden_layers=1, vf_hidden_dim=128, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard/" + args.experiment_id, optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.001, render_attacker_view=True, lr_progress_power_decay=4, lr_progress_decay=True, use_sde=False, sde_sample_freq=4, one_hot_obs=False, lstm_core=False, lstm_hidden_dim=32, multi_channel_obs=False, channel_1_dim=32, channel_1_layers=2, channel_1_input_dim=16, channel_2_dim=32, channel_2_layers=2, channel_2_input_dim=16, channel_3_dim=32, channel_3_layers=2, channel_3_input_dim=4, channel_4_dim=32, channel_4_layers=2, channel_4_input_dim=4, mini_batch_size=64, ar_policy=True, attacker_node_input_dim=((4 + 2) * 4), attacker_at_net_input_dim=(4 + 2), attacker_at_net_output_dim=(4 + 1), attacker_node_net_output_dim=4)