Exemplo n.º 1
0
    def action(self, game_state: GameState) -> int:
        """
        Samples an action from the policy.

        :param game_state: the game state
        :return: action_id
        """

        # Feature engineering
        attacker_obs = game_state.get_attacker_observation(
            self.game_config.network_config,
            local_view=True,
            reconnaissance=self.game_config.reconnaissance_actions)
        defender_obs = game_state.get_defender_observation(
            self.game_config.network_config)
        neighbor_defense_attributes = np.zeros(
            (attacker_obs.shape[0], defender_obs.shape[1]))
        for node in range(attacker_obs.shape[0]):
            if int(attacker_obs[node][-1]) == 1:
                id = int(attacker_obs[node][-2])
                neighbor_defense_attributes[node] = defender_obs[id]
        node_ids = attacker_obs[:, -2]
        node_reachable = attacker_obs[:, -1]
        det_values = neighbor_defense_attributes[:, -1]
        temp = neighbor_defense_attributes[:, 0:-1] - attacker_obs[:, 0:-2]
        features = []
        for idx, row in enumerate(temp):
            t = row.tolist()
            t.append(node_ids[idx])
            t.append(node_reachable[idx])
            t.append(det_values[idx])
            features.append(t)
        features = np.array(features)

        state = torch.from_numpy(features.flatten()).float()
        # Move to GPU if using GPU
        if torch.cuda.is_available() and self.config.gpu:
            device = torch.device("cuda:" + str(self.config.gpu_id))
            state = state.to(device)
        legal_actions, non_legal_actions = self.get_legal_attacker_actions(
            attacker_obs, game_state)
        # Forward pass using the current policy network to predict P(a|s)
        action_probs = self.attacker_policy_network(state)
        # Set probability of non-legal actions to 0
        action_probs_1 = action_probs.clone()
        if len(legal_actions) > 0 and len(non_legal_actions) < len(
                action_probs_1):
            action_probs_1[non_legal_actions] = 0
        # Use torch.distributions package to create a parameterizable probability distribution of the learned policy
        policy_dist = Categorical(action_probs_1)
        # Sample an action from the probability distribution
        action = policy_dist.sample()

        global_action = PolicyGradientAgent.convert_local_attacker_action_to_global(
            action.item(), attacker_obs)
        return global_action
Exemplo n.º 2
0
 def test_copy(self):
     state = GameState()
     num_attack_types = 10
     rows = 4
     cols = 4
     network_config = NetworkConfig(rows, cols)
     num_nodes = len(network_config.node_list)
     state.default_state(list(range(num_nodes)), (3, 1), num_attack_types,
                         network_config)
     copy = state.copy()
     assert copy.num_hacks == state.num_hacks
     assert np.array_equal(copy.attack_values, state.attack_values)
     assert np.array_equal(copy.defense_det, state.defense_det)
     assert np.array_equal(copy.defense_values, state.defense_values)
Exemplo n.º 3
0
    def set_load_initial_state(self, initial_state_path: str) -> None:
        """
        Sets the initial state by loading it from disk

        :param initial_state_path:
        :return: None
        """
        self.initial_state = GameState.load(initial_state_path)
Exemplo n.º 4
0
 def test_default_state(self):
     state = GameState()
     rows = 4
     cols = 4
     network_config = NetworkConfig(rows, cols)
     num_nodes = len(network_config.node_list)
     num_attack_types = 10
     state.default_state(list(range(num_nodes)), (3, 1), num_attack_types,
                         network_config)
     assert state.attack_values.shape == (num_nodes, 10)
     assert state.defense_values.shape == (num_nodes, 10)
     assert state.defense_det.shape == (num_nodes, )
     assert state.attacker_pos == (3, 1)
     assert state.done == False
     assert state.hacked == False
     assert state.num_hacks == 0
     assert state.detected == False
     assert state.num_games == 0
Exemplo n.º 5
0
 def test_simulate_attack(self):
     rows = 4
     cols = 4
     network_config = NetworkConfig(rows, cols)
     num_nodes = len(network_config.node_list)
     state = GameState()
     num_attack_types = 10
     state.default_state(list(range(num_nodes)), (3, 1), num_attack_types,
                         network_config)
     attack_node_id = 3
     attack_type = 4
     state.defense_values[attack_node_id][attack_type] = 5
     state.attack_values[attack_node_id][attack_type] = 5
     assert not state.simulate_attack(attack_node_id, attack_type,
                                      network_config)
     state.defense_values[attack_node_id][attack_type] = 5
     state.attack_values[attack_node_id][attack_type] = 6
     assert state.simulate_attack(attack_node_id, attack_type,
                                  network_config)
Exemplo n.º 6
0
 def test_defend(self):
     state = GameState()
     rows = 4
     cols = 4
     network_config = NetworkConfig(rows, cols)
     num_nodes = len(network_config.node_list)
     num_attack_types = 10
     state.default_state(list(range(num_nodes)), (3, 1), num_attack_types,
                         network_config)
     defend_node_id = 3
     defense_type = 4
     max_value = 10
     old_count = state.defense_values[defend_node_id][defense_type]
     state.defend(defend_node_id, defense_type, max_value, network_config)
     assert state.defense_values[defend_node_id][defense_type] < max_value
     assert state.defense_values[defend_node_id][
         defense_type] == old_count + 1
     state.defense_values[defend_node_id][defense_type] = 10
     state.defend(defend_node_id, defense_type, max_value, network_config)
     assert state.defense_values[defend_node_id][defense_type] == max_value
Exemplo n.º 7
0
    def set_state(self, game_state: GameState) -> None:
        """
        Updates the current state

        :param state: the new state
        :return: None
        """
        self.game_state = game_state.copy()
        self.game_panel.update_state_text(self.game_state)
        self.attacker_sprite.move_to_pos(self.game_state.attacker_pos)
        if game_state.detected:
            self.attacker_sprite.detected()
        else:
            self.attacker_sprite.undetect()
        self.resource_network.set_node_states(self.game_state)
Exemplo n.º 8
0
    def action(self, game_state: GameState) -> int:
        """
        Samples an action from the policy

        :param game_state: the game state
        :return: action_id
        """
        from gym_idsgame.envs.util import idsgame_util
        actions = list(range(self.game_config.num_attack_actions))
        if not self.game_config.reconnaissance_actions:
            legal_actions = list(
                filter(
                    lambda action: idsgame_util.is_attack_id_legal(
                        action, self.game_config, game_state.attacker_pos,
                        game_state), actions))
            if len(legal_actions) > 0:
                action = np.random.choice(legal_actions)
            else:
                action = np.random.choice(actions)
        else:
            attacker_obs = game_state.get_attacker_observation(
                self.game_config.network_config,
                local_view=self.idsgame_env.local_view_features(),
                reconnaissance=self.game_config.reconnaissance_actions,
                reconnaissance_bool_features=self.idsgame_env.idsgame_config.
                reconnaissance_bool_features)
            legal_actions = list(
                filter(
                    lambda action: self.is_attack_legal(
                        action, attacker_obs, game_state), actions))
            if len(legal_actions) > 0:
                action = np.random.choice(legal_actions)
            else:
                action = np.random.choice(actions)
            if self.idsgame_env.local_view_features():
                action = self.convert_local_attacker_action_to_global(
                    action, attacker_obs)
        return action
    def action(self, game_state: GameState) -> int:
        """
        Samples an action from the policy.

        :param game_state: the game state
        :return: action_id
        """
        try:
            # Feature engineering
            attacker_obs = game_state.get_attacker_observation(
                self.game_config.network_config,
                local_view=self.idsgame_env.local_view_features(),
                reconnaissance=self.game_config.reconnaissance_actions,
                reconnaissance_bool_features=self.idsgame_env.idsgame_config.
                reconnaissance_bool_features)
            defender_obs = game_state.get_defender_observation(
                self.game_config.network_config)
            defender_state = self.update_state(attacker_obs=attacker_obs,
                                               defender_obs=defender_obs,
                                               state=[],
                                               attacker=False)
            if not self.config.ar_policy:
                actions = list(range(self.idsgame_env.num_defense_actions))
                non_legal_actions = list(
                    filter(
                        lambda action: not self.idsgame_env.is_defense_legal(
                            action), actions))
                obs_tensor_d = torch.as_tensor(defender_state.flatten()).to(
                    self.device)
                defender_actions, defender_values, defender_log_probs = self.model.defender_policy.forward(
                    obs_tensor_d,
                    self.idsgame_env,
                    device=self.device,
                    attacker=False,
                    non_legal_actions=non_legal_actions)
                defender_actions = defender_actions.item()
            else:
                actions = list(range(self.config.defender_node_net_output_dim))
                non_legal_actions = list(
                    filter(
                        lambda action: not self.is_defense_legal(
                            action, node=True, game_state=game_state),
                        actions))
                if len(non_legal_actions) == len(actions):
                    non_legal_actions = []
                obs_tensor_d = torch.as_tensor(defender_state.flatten()).to(
                    self.device)
                defender_node_actions, defender_node_values, defender_node_log_probs, defender_node_lstm_state = self.model.defender_node_policy.forward(
                    obs_tensor_d,
                    self.idsgame_env,
                    device=self.device,
                    attacker=False,
                    non_legal_actions=non_legal_actions)
                defender_node_actions = defender_node_actions.cpu().numpy()
                node = defender_node_actions[0]
                obs_tensor_d_1 = obs_tensor_d.reshape(
                    self.idsgame_env.idsgame_config.game_config.num_nodes,
                    self.config.defender_at_net_input_dim)
                obs_tensor_d_at = obs_tensor_d_1[node]
                actions = list(range(self.config.defender_at_net_output_dim))
                non_legal_actions = list(
                    filter(
                        lambda action: not self.is_defense_legal(
                            action,
                            node=False,
                            game_state=game_state,
                            obs=obs_tensor_d_at), actions))
                if len(non_legal_actions) == len(actions):
                    non_legal_actions = []
                defender_at_actions, defender_at_values, defender_at_log_probs, defender_at_lstm_state = self.model.defender_at_policy.forward(
                    obs_tensor_d_at,
                    self.idsgame_env,
                    device=self.device,
                    attacker=False,
                    non_legal_actions=non_legal_actions)
                defender_at_actions = defender_at_actions.cpu().numpy()
                attack_id = util.get_defense_action_id(
                    node, defender_at_actions[0],
                    self.idsgame_env.idsgame_config.game_config)
                defender_actions = attack_id
        except Exception as e:
            print(str(e))
            traceback.print_exc()

        return defender_actions
Exemplo n.º 10
0
 def test_new_game(self):
     rows = 4
     cols = 4
     network_config = NetworkConfig(rows, cols)
     state = GameState()
     num_nodes = len(network_config.node_list)
     num_attack_types = 10
     state.default_state(list(range(num_nodes)), (3, 1), num_attack_types,
                         network_config)
     init_state = state.copy()
     old_game_count = state.num_games
     state.new_game(init_state)
     assert state.num_games == old_game_count + 1
     assert state.done == False
     assert state.detected == False
     state.default_state(list(range(num_nodes)), (3, 1), num_attack_types,
                         network_config)
     init_state = state.copy()
     state.hacked = True
     old_hacked_count = 0
     state.new_game(init_state)
     assert state.num_hacks == old_hacked_count + 1
Exemplo n.º 11
0
 def test_initialization(self):
     GameState()
Exemplo n.º 12
0
    def action(self, game_state: GameState) -> int:
        """
        Samples an action from the policy.

        :param game_state: the game state
        :return: action_id
        """
        try:
            # Feature engineering
            attacker_obs = game_state.get_attacker_observation(
                self.game_config.network_config,
                local_view=self.idsgame_env.local_view_features(),
                reconnaissance=self.game_config.reconnaissance_actions,
                reconnaissance_bool_features=self.idsgame_env.idsgame_config.
                reconnaissance_bool_features)
            defender_obs = game_state.get_defender_observation(
                self.game_config.network_config)
            attacker_state = self.update_state(attacker_obs=attacker_obs,
                                               defender_obs=defender_obs,
                                               state=[],
                                               attacker=True)
            if not self.config.ar_policy:
                actions = list(range(self.idsgame_env.num_attack_actions))
                non_legal_actions = list(
                    filter(
                        lambda action: not self.is_attack_legal(
                            action, attacker_obs, game_state), actions))
                obs_tensor_a = torch.as_tensor(attacker_state.flatten()).to(
                    self.device)
                attacker_actions, attacker_values, attacker_log_probs = self.model.attacker_policy.forward(
                    obs_tensor_a,
                    self.idsgame_env,
                    device=self.device,
                    attacker=True,
                    non_legal_actions=non_legal_actions)
                attacker_action = attacker_actions.cpu().numpy()[0]
            else:
                actions = list(range(self.config.attacker_node_net_output_dim))
                non_legal_actions = list(
                    filter(
                        lambda action: not self.is_attack_legal(
                            action, attacker_obs, game_state, node=True),
                        actions))
                obs_tensor_a = torch.as_tensor(attacker_state.flatten()).to(
                    self.device)
                attacker_node_actions, attacker_node_values, attacker_node_log_probs, attacker_node_lstm_state = self.model.attacker_node_policy.forward(
                    obs_tensor_a,
                    self.idsgame_env,
                    device=self.device,
                    attacker=True,
                    non_legal_actions=non_legal_actions)
                attacker_node_probs = self.model.attacker_node_policy.get_action_dist(
                    obs_tensor_a,
                    self.idsgame_env,
                    device=self.device,
                    attacker=True,
                    non_legal_actions=non_legal_actions)
                attacker_node_actions = attacker_node_actions.cpu().numpy()
                node = attacker_node_actions[0]
                obs_tensor_a_1 = obs_tensor_a.reshape(
                    self.idsgame_env.idsgame_config.game_config.num_nodes,
                    self.config.attacker_at_net_input_dim)
                obs_tensor_a_at = obs_tensor_a_1[node]
                attacker_at_actions, attacker_at_values, attacker_at_log_probs, attacker_at_lstm_state = self.model.attacker_at_policy.forward(
                    obs_tensor_a_at,
                    self.idsgame_env,
                    device=self.device,
                    attacker=True,
                    non_legal_actions=non_legal_actions)
                attacker_at_probs = self.model.attacker_at_policy.get_action_dist(
                    obs_tensor_a_at,
                    self.idsgame_env,
                    device=self.device,
                    attacker=True,
                    non_legal_actions=non_legal_actions)
                # print("attacker node probs:{}".format(attacker_node_probs.detach().cpu().numpy()))
                # print("attacker at probs:{}".format(attacker_at_probs.detach().cpu().numpy()))
                self.create_policy_plot(
                    attacker_at_probs.detach().cpu().numpy(), 0, attacker=True)
                attacker_at_actions = attacker_at_actions.cpu().numpy()
                attack_id = util.get_attack_action_id(
                    node, attacker_at_actions[0],
                    self.idsgame_env.idsgame_config.game_config)
                attacker_action = attack_id
        except Exception as e:
            print(str(e))
            traceback.print_exc()

        if self.idsgame_env.local_view_features():
            attack = self.convert_local_attacker_action_to_global(
                attacker_action, attacker_obs)
            return attack
        else:
            return attacker_action
Exemplo n.º 13
0
class GameConfig():
    """
    DTO with game configuration parameters
    """
    def __init__(self,
                 network_config: NetworkConfig = None,
                 manual_attacker: bool = True,
                 num_layers: int = 1,
                 num_servers_per_layer: int = 2,
                 num_attack_types: int = 10,
                 max_value: int = 9,
                 initial_state: GameState = None,
                 manual_defender: bool = False,
                 initial_state_path: str = None,
                 dense_rewards=False,
                 min_random_a_val: int = 0,
                 min_random_d_val: int = 0,
                 min_random_det_val: int = 0,
                 dense_rewards_v2=False,
                 reconnaissance_actions: bool = False,
                 max_random_v_val: int = 1,
                 dense_rewards_v3=False):
        """
        Class constructor, initializes the DTO

        :param network_config: the network configuration of the game (e.g. number of nodes and their connectivity)
        :param manual_attacker: whether the attacker is controlled manually or by an agent
        :param manual_attacker: whether the defender is controlled manually or by an agent
        :param num_layers: the number of layers in the network
        :param num_servers_per_layer: the number of servers per layer in the network
        :param num_attack_types: the number of attack types
        :param max_value: max value for a defense/attack attribute
        :param initial_state: the initial state
        :param initial_state_path: path to the initial state saved on disk
        :param dense_rewards: if true, give hacker dense rewards (reward for each intermediate server hacked)
        :param dense_rewards_v2: if true, give defender reward only when blocking
        :param min_random_a_val: minimum attack value when randomizing the state
        :param min_random_d_val: minimum defense value when randomizing the state
        :param min_random_det_val: minimum detection value when randomizing the state
        :param reconnaissance_actions: a boolean flag that indicates whether reconnaissance activities are enabled for
                                       the attacker
        :param max_random_v_val: maximum random vulnerability value when usign randomized environment
        """
        self.reconnaissance_actions = reconnaissance_actions
        self.manual_attacker = manual_attacker
        self.manual_defender = manual_defender
        self.num_layers = num_layers
        self.num_servers_per_layer = num_servers_per_layer
        self.num_attack_types = num_attack_types
        self.max_value = max_value
        self.min_random_a_val = min_random_a_val
        self.min_random_d_val = min_random_d_val
        self.min_random_det_val = min_random_det_val
        self.num_rows = self.num_layers + 2
        self.num_nodes = self.num_layers * self.num_servers_per_layer + 2  # +2 for Start and Data Nodes
        self.num_cols = self.num_servers_per_layer
        self.set_attack_actions()
        self.num_defense_actions = (self.num_attack_types + 1) * self.num_nodes
        self.num_states = self.num_nodes
        self.network_config = network_config
        self.initial_state_path = initial_state_path
        self.defense_val = 2
        self.attack_val = 0
        self.num_vulnerabilities_per_node = 1
        self.det_val = 2
        self.dense_rewards_v2 = dense_rewards_v2
        self.dense_rewards_v3 = dense_rewards_v3
        self.vulnerabilitiy_val = 0
        self.max_random_v_val = max_random_v_val
        self.num_vulnerabilities_per_layer = None
        if network_config is None:
            self.network_config = NetworkConfig(self.num_rows,
                                                self.num_cols,
                                                connected_layers=False)
        self.initial_state = initial_state
        if self.initial_state is None and self.initial_state_path is not None:
            self.initial_state = GameState.load(self.initial_state)
        if self.initial_state is None and self.initial_state_path is None:
            self.initial_state = GameState(
                min_random_a_val=min_random_a_val,
                min_random_det_val=min_random_det_val,
                min_random_d_val=min_random_d_val,
                max_value=self.max_value,
                max_random_v_val=self.max_random_v_val)
            self.initial_state.default_state(
                self.network_config.node_list,
                self.network_config.start_pos,
                self.num_attack_types,
                network_config=self.network_config,
            )
        self.dense_rewards = dense_rewards

    def set_attack_actions(self, local_view: bool = False):
        if not self.reconnaissance_actions:
            self.num_attack_actions = self.num_attack_types * self.num_nodes
        else:
            if not local_view:
                self.num_attack_actions = (self.num_attack_types +
                                           1) * self.num_nodes
            else:
                self.num_attack_actions = (
                    self.num_attack_types +
                    1) * self.network_config.max_neighbors

    def set_load_initial_state(self, initial_state_path: str) -> None:
        """
        Sets the initial state by loading it from disk

        :param initial_state_path:
        :return: None
        """
        self.initial_state = GameState.load(initial_state_path)

    def set_initial_state(self,
                          defense_val=2,
                          attack_val=0,
                          num_vulnerabilities_per_node=1,
                          det_val=2,
                          vulnerability_val=0,
                          num_vulnerabilities_per_layer=None,
                          randomize_visibility: bool = False,
                          visibility_p: float = 0.5):
        """
        Utility function for setting the initial game state

        :param defense_val: defense value for defense types that are not vulnerable
        :param attack_val: attack value for attack types
        :param num_vulnerabilities_per_node: number of vulnerabilities per node
        :param det_val: detection value per node
        :param vulnerability_val: defense value for defense types that are vulnerable
        :param num_vulnerabilities_per_layer: number of vulnerabilities per layer
        :param min_random_val: minimum val when randomizing the state
        :param randomize_state: boolean flag whether to create the state randomly
        :param randomize_visibility: boolean flag whether to randomize visibility for partially observed envs
        :return:
        """
        if num_vulnerabilities_per_layer is None:
            num_vulnerabilities_per_layer = self.num_servers_per_layer
        self.defense_val = defense_val
        self.attack_val = attack_val
        self.num_vulnerabilities_per_layer = num_vulnerabilities_per_layer
        self.det_val = det_val
        self.vulnerabilitiy_val = vulnerability_val
        self.num_vulnerabilities_per_node = num_vulnerabilities_per_node
        self.initial_state.set_state(
            self.network_config.node_list,
            self.num_attack_types,
            defense_val=defense_val,
            attack_val=attack_val,
            num_vulnerabilities_per_node=num_vulnerabilities_per_node,
            det_val=det_val,
            vulnerability_val=vulnerability_val,
            network_config=self.network_config,
            num_vulnerabilities_per_layer=num_vulnerabilities_per_layer,
            randomize_visibility=randomize_visibility,
            visibility_p=visibility_p)

    def get_attacker_observation_space(self) -> gym.spaces.Box:
        """
        Creates an OpenAI-Gym Space for the game observation

        :return: observation space
        """
        if not self.reconnaissance_actions:
            high_row = np.array([self.max_value] * (self.num_attack_types + 1))
            low = np.zeros((self.num_nodes, self.num_attack_types + 1))
        else:
            high_row = np.array([self.max_value] *
                                (self.num_attack_types * 2 + 1))
            low = np.zeros((self.num_nodes, self.num_attack_types * 2 + 1))
        high = np.array([high_row] * self.num_nodes)
        observation_space = gym.spaces.Box(low=low, high=high, dtype=np.int32)
        return observation_space

    def get_defender_observation_space(self) -> gym.spaces.Box:
        """
        Creates an OpenAI-Gym Space for the game observation

        :return: observation space
        """
        high_row = np.array([self.max_value] * (self.num_attack_types + 1))
        high = np.array([high_row] * 1)
        low = np.zeros((1, self.num_attack_types + 1))
        observation_space = gym.spaces.Box(low=low, high=high, dtype=np.int32)
        return observation_space

    def get_action_space(self, defender: bool = False) -> gym.spaces.Discrete:
        """
        Creates an OpenAi-Gym space for the actions in the environment

        :param defender: boolean flag if defender or not
        :return: action space
        """
        if defender:
            return gym.spaces.Discrete(self.num_defense_actions)
        else:
            return gym.spaces.Discrete(self.num_attack_actions)
Exemplo n.º 14
0
    def __init__(self,
                 network_config: NetworkConfig = None,
                 manual_attacker: bool = True,
                 num_layers: int = 1,
                 num_servers_per_layer: int = 2,
                 num_attack_types: int = 10,
                 max_value: int = 9,
                 initial_state: GameState = None,
                 manual_defender: bool = False,
                 initial_state_path: str = None,
                 dense_rewards=False,
                 min_random_a_val: int = 0,
                 min_random_d_val: int = 0,
                 min_random_det_val: int = 0,
                 dense_rewards_v2=False,
                 reconnaissance_actions: bool = False,
                 max_random_v_val: int = 1,
                 dense_rewards_v3=False):
        """
        Class constructor, initializes the DTO

        :param network_config: the network configuration of the game (e.g. number of nodes and their connectivity)
        :param manual_attacker: whether the attacker is controlled manually or by an agent
        :param manual_attacker: whether the defender is controlled manually or by an agent
        :param num_layers: the number of layers in the network
        :param num_servers_per_layer: the number of servers per layer in the network
        :param num_attack_types: the number of attack types
        :param max_value: max value for a defense/attack attribute
        :param initial_state: the initial state
        :param initial_state_path: path to the initial state saved on disk
        :param dense_rewards: if true, give hacker dense rewards (reward for each intermediate server hacked)
        :param dense_rewards_v2: if true, give defender reward only when blocking
        :param min_random_a_val: minimum attack value when randomizing the state
        :param min_random_d_val: minimum defense value when randomizing the state
        :param min_random_det_val: minimum detection value when randomizing the state
        :param reconnaissance_actions: a boolean flag that indicates whether reconnaissance activities are enabled for
                                       the attacker
        :param max_random_v_val: maximum random vulnerability value when usign randomized environment
        """
        self.reconnaissance_actions = reconnaissance_actions
        self.manual_attacker = manual_attacker
        self.manual_defender = manual_defender
        self.num_layers = num_layers
        self.num_servers_per_layer = num_servers_per_layer
        self.num_attack_types = num_attack_types
        self.max_value = max_value
        self.min_random_a_val = min_random_a_val
        self.min_random_d_val = min_random_d_val
        self.min_random_det_val = min_random_det_val
        self.num_rows = self.num_layers + 2
        self.num_nodes = self.num_layers * self.num_servers_per_layer + 2  # +2 for Start and Data Nodes
        self.num_cols = self.num_servers_per_layer
        self.set_attack_actions()
        self.num_defense_actions = (self.num_attack_types + 1) * self.num_nodes
        self.num_states = self.num_nodes
        self.network_config = network_config
        self.initial_state_path = initial_state_path
        self.defense_val = 2
        self.attack_val = 0
        self.num_vulnerabilities_per_node = 1
        self.det_val = 2
        self.dense_rewards_v2 = dense_rewards_v2
        self.dense_rewards_v3 = dense_rewards_v3
        self.vulnerabilitiy_val = 0
        self.max_random_v_val = max_random_v_val
        self.num_vulnerabilities_per_layer = None
        if network_config is None:
            self.network_config = NetworkConfig(self.num_rows,
                                                self.num_cols,
                                                connected_layers=False)
        self.initial_state = initial_state
        if self.initial_state is None and self.initial_state_path is not None:
            self.initial_state = GameState.load(self.initial_state)
        if self.initial_state is None and self.initial_state_path is None:
            self.initial_state = GameState(
                min_random_a_val=min_random_a_val,
                min_random_det_val=min_random_det_val,
                min_random_d_val=min_random_d_val,
                max_value=self.max_value,
                max_random_v_val=self.max_random_v_val)
            self.initial_state.default_state(
                self.network_config.node_list,
                self.network_config.start_pos,
                self.num_attack_types,
                network_config=self.network_config,
            )
        self.dense_rewards = dense_rewards