def EGTA_restart(restart_epoch, start_hado=2, retrain=False, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("============Continue Running HADO-EGTA=================") print("=======================================================") else: print("=======================================================") print("=============Continue Running DO-EGTA==================") print("=======================================================") epoch = restart_epoch - 1 game = fp.load_pkl(game_path) env = game.env retrain_start = False count = 8 - restart_epoch while count != 0: # while True: # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print('a_BD is ', a_BD) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print('d_BD is ', d_BD) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. game = sim_Series.sim_and_modifiy_Series_with_game(game) # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END EPOCH: " + str(epoch)) print(datetime.datetime.now())
def initialize(load_env=None, env_name=None): print("=======================================================") print("=======Begin Initialization and first epoch============") print("=======================================================") # Create Environment if isinstance(load_env, str): path = os.getcwd() + load_env + '.pkl' if not fp.isExist(path): raise ValueError("The env being loaded does not exist.") env = fp.load_pkl(path) else: # env is created and saved. env = dag.env_rand_gen_and_save(env_name) # save graph copy env.save_graph_copy() env.save_mask_copy() # create players and point to their env env.create_players() env.create_action_space() # load param param_path = os.getcwd() + '/network_parameters/param.json' param = jp.load_json_data(param_path) # initialize game data game = game_data.Game_data(env, num_episodes=param['num_episodes'], threshold=param['threshold']) game.set_hado_param(param=param['hado_param']) game.set_hado_time_step(param['retrain_timesteps']) game.env.defender.set_env_belong_to(game.env) game.env.attacker.set_env_belong_to(game.env) env.defender.set_env_belong_to(env) env.attacker.set_env_belong_to(env) # uniform strategy has been produced ahead of time print("epoch 1:", datetime.datetime.now()) epoch = 1 act_att = 'att_str_epoch1.pkl' act_def = 'def_str_epoch1.pkl' game.add_att_str(act_att) game.add_def_str(act_def) print('Begin simulation for uniform strategy.') sys.stdout.flush() # simulate using random strategies and initialize payoff matrix # if MPI_flag: # aReward, dReward = do_MPI_sim(act_att, act_def) # else: aReward, dReward = series_sim(game.env, game, act_att, act_def, game.num_episodes) print('Done simulation for uniform strategy.') sys.stdout.flush() game.init_payoffmatrix(dReward, aReward) ne = {} ne[0] = np.array([1], dtype=np.float32) ne[1] = np.array([1], dtype=np.float32) game.add_nasheq(epoch, ne) # save a copy of game data game_path = os.getcwd() + '/game_data/game.pkl' fp.save_pkl(game, game_path) sys.stdout.flush() return game
def series_sim(env, game, nn_att, nn_def, num_episodes): aReward_list = np.array([]) dReward_list = np.array([]) nn_att_saved = copy.copy(nn_att) nn_def_saved = copy.copy(nn_def) T = env.T # Test if nn_att and nn_def point to one single strategy. single_str_att = True single_str_def = True if isinstance(nn_att, np.ndarray): if len(np.where(nn_att > 0.95)[0]) != 1: single_str_att = False if isinstance(nn_def, np.ndarray): if len(np.where(nn_def > 0.95)[0]) != 1: single_str_def = False _, targetset = get_Targets(env.G) for i in range(num_episodes): #can be run parallel # G = copy.deepcopy(env.G_reserved) # attacker = copy.deepcopy(env.attacker) # defender = copy.deepcopy(env.defender) env.reset_everything() G = env.G attacker = env.attacker defender = env.defender aReward = 0 dReward = 0 if i == 0 or not single_str_att: att_uniform_flag = False nn_att = copy.copy(nn_att_saved) if isinstance(nn_att, np.ndarray): str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) if "epoch1.pkl" in nn_att: att_uniform_flag = True path = os.getcwd() + "/attacker_strategies/" + nn_att if att_uniform_flag: nn_att_act = fp.load_pkl(path) else: training_flag = 1 nn_att_act, sess1, graph1 = load_action_class( path, nn_att, game, training_flag) if i == 0 or not single_str_def: def_uniform_flag = False nn_def = copy.copy(nn_def_saved) if isinstance(nn_def, np.ndarray): str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if "epoch1.pkl" in nn_def: def_uniform_flag = True path = os.getcwd() + "/defender_strategies/" + nn_def if def_uniform_flag: nn_def_act = fp.load_pkl(path) else: training_flag = 0 nn_def_act, sess2, graph2 = load_action_class( path, nn_def, game, training_flag) # def_uniform_flag = False # att_uniform_flag = False # # nn_att = copy.copy(nn_att_saved) # nn_def = copy.copy(nn_def_saved) # # # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy. # # A str represents the name of a strategy. # # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str): # str_set = game.att_str # nn_att = np.random.choice(str_set, p=nn_att) # # if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray): # str_set = game.def_str # nn_def = np.random.choice(str_set, p=nn_def) # # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray): # str_set = game.att_str # nn_att = np.random.choice(str_set, p=nn_att) # str_set = game.def_str # nn_def = np.random.choice(str_set, p=nn_def) # # if "epoch1" in nn_att: # att_uniform_flag = True # # if "epoch1" in nn_def: # def_uniform_flag = True # # path = os.getcwd() + "/attacker_strategies/" + nn_att # if att_uniform_flag: # nn_att_act = fp.load_pkl(path) # else: # training_flag = 1 # nn_att_act, sess1, graph1 = load_action_class(path, nn_att, game, training_flag) # # path = os.getcwd() + "/defender_strategies/" + nn_def # if def_uniform_flag: # nn_def_act = fp.load_pkl(path) # else: # training_flag = 0 # nn_def_act, sess2, graph2 = load_action_class(path, nn_def, game, training_flag) # print('===================================') # print('==========start episode============') # print('===================================') # print(aReward, dReward) for t in range(T): # print('====================') timeleft = T - t if att_uniform_flag: attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) else: with graph1.as_default(): with sess1.as_default(): attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) if def_uniform_flag: defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) else: with graph2.as_default(): with sess2.as_default(): defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) att_action_set = attacker.attact def_action_set = defender.defact # print(t, 'att:', att_action_set) # print(t, 'def:', def_action_set) for attack in att_action_set: if isinstance(attack, tuple): # check OR node aReward += G.edges[attack]['cost'] if random.uniform(0, 1) <= G.edges[attack]['actProb']: G.nodes[attack[-1]]['state'] = 1 else: # check AND node aReward += G.nodes[attack]['aCost'] if random.uniform(0, 1) <= G.nodes[attack]['actProb']: G.nodes[attack]['state'] = 1 # defender's action for node in def_action_set: G.nodes[node]['state'] = 0 dReward += G.nodes[node]['dCost'] # print('Before Traget aRew:', aReward, 'dRew:', dReward) # print('target set:', targetset) # current_state = [] # for node in G.nodes: # current_state.append(G.nodes[node]['state']) # print('current_state:', current_state) for node in targetset: if G.nodes[node]['state'] == 1: aReward += G.nodes[node]['aReward'] dReward += G.nodes[node]['dPenalty'] # print('aRew:', aReward, 'dRew:', dReward) # update players' observations # update defender's observation defender.update_obs(defender.get_def_hadAlert(G)) defender.save_defact2prev() defender.defact.clear() # update attacker's observation attacker.update_obs(attacker.get_att_isActive(G)) attacker.attact.clear() aReward_list = np.append(aReward_list, aReward) dReward_list = np.append(dReward_list, dReward) # print('alist:', aReward_list) # print('dlist:', dReward_list) return np.round(np.mean(aReward_list), 2), np.round(np.mean(dReward_list), 2)
def series_sim(env, game, nn_att, nn_def, size): aReward_list = np.array([]) dReward_list = np.array([]) nn_att_saved = copy.copy(nn_att) nn_def_saved = copy.copy(nn_def) if size > 20: num_epi = 10 elif size > 10 and size <= 20: num_epi = 20 else: num_epi = 30 for i in range(2): G = copy.deepcopy(env.G_reserved) attacker = copy.deepcopy(env.attacker) defender = copy.deepcopy(env.defender) T = env.T aReward = 0 dReward = 0 def_uniform_flag = False att_uniform_flag = False nn_att = copy.copy(nn_att_saved) nn_def = copy.copy(nn_def_saved) # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy. # A str represents the name of a strategy. if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str): str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray): str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray): str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if "epoch1" in nn_att: att_uniform_flag = True if "epoch1" in nn_def: def_uniform_flag = True path = os.getcwd() + "/attacker_strategies/" + nn_att if att_uniform_flag: nn_att_act = fp.load_pkl(path) else: training_flag = 1 nn_att_act, sess1, graph1 = load_action_class( path, nn_att, game, training_flag) path = os.getcwd() + "/defender_strategies/" + nn_def if def_uniform_flag: nn_def_act = fp.load_pkl(path) else: training_flag = 0 nn_def_act, sess2, graph2 = load_action_class( path, nn_def, game, training_flag) for t in range(T): timeleft = T - t if att_uniform_flag: attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) else: with graph1.as_default(): with sess1.as_default(): attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) if def_uniform_flag: defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) else: with graph2.as_default(): with sess2.as_default(): defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) att_action_set = attacker.attact def_action_set = defender.defact # print('att:', att_action_set) # print('def:', def_action_set) for attack in att_action_set: if isinstance(attack, tuple): # check OR node aReward += G.edges[attack]['cost'] if random.uniform(0, 1) <= G.edges[attack]['actProb']: G.nodes[attack[-1]]['state'] = 1 else: # check AND node aReward += G.nodes[attack]['aCost'] if random.uniform(0, 1) <= G.nodes[attack]['actProb']: G.nodes[attack]['state'] = 1 # defender's action for node in def_action_set: G.nodes[node]['state'] = 0 dReward += G.nodes[node]['dCost'] _, targetset = get_Targets(G) for node in targetset: if G.nodes[node]['state'] == 1: aReward += G.nodes[node]['aReward'] dReward += G.nodes[node]['dPenalty'] aReward_list = np.append(aReward_list, aReward) dReward_list = np.append(dReward_list, dReward) return np.mean(aReward_list), np.mean(dReward_list)
def train_and_sim(): arg_path = os.getcwd() + '/inner_egta_arg/' start_hado, retrain = fp.load_pkl(arg_path + 'hado_arg.pkl') epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl') game_path = os.getcwd() + '/game_data/game.pkl' game = fp.load_pkl(game_path) env = game.env retrain_start = False mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag) game = sim_Series.sim_and_modifiy_Series_with_game(game) game.env.attacker.nn_att = None game.env.defender.nn_def = None print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) fp.save_pkl(game, game_path) fp.save_pkl(epoch, arg_path + 'epoch_arg.pkl')