def EGTA_restart(restart_epoch, start_hado=2, retrain=False, transfer=False, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("============Continue Running HADO-EGTA=================") print("=======================================================") else: print("=======================================================") print("=============Continue Running DO-EGTA==================") print("=======================================================") epoch = restart_epoch - 1 game = fp.load_pkl(game_path) env = game.env retrain_start = False count = 8 - restart_epoch while count != 0: # while True: # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start, transfer=transfer) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start, transfer=transfer) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. game = sim_Series.sim_and_modifiy_Series_with_game(game) # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END EPOCH: " + str(epoch)) print(datetime.datetime.now())
def _run(env, game, meta_method_name, epoch: int = 1, game_path: str = None, n_processes: int = 1): assert n_processes > 0, "Invalid number of processors." if game_path is None: game_path = osp.join(settings.get_run_dir(), "game.pkl") logger.info("=======================================================") logger.info("===============Begin Running DO-EGTA===================") logger.info("=======================================================") proc = psutil.Process(os.getpid()) result_dir = settings.get_run_dir() selector = meta_method_selector(meta_method_name) count = 80 while count != 0: mem0 = proc.memory_info().rss # Fix opponent strategy. mix_str_def, mix_str_att = selector.sample(game, epoch) # Save mixed strategies. # with open(osp.join(result_dir, f"mix_defender.{epoch}.pkl"), "wb") as outfile: # pickle.dump(mix_str_def, outfile) # with open(osp.join(result_dir, f"mix_attacker.{epoch}.pkl"), "wb") as outfile: # pickle.dump(mix_str_att, outfile) # with open(osp.join(result_dir, f"payoff_defender.{epoch}.pkl"), "wb") as outfile: # pickle.dump(game.payoffmatrix_def, outfile) # with open(osp.join(result_dir, f"payoff_attacker.{epoch}.pkl"), "wb") as outfile: # pickle.dump(game.payoffmatrix_att, outfile) # Equilibrium pay-off. aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 logger.info("Epoch " + str(epoch)) epoch_dir = osp.join(result_dir, f"epoch_{epoch}") # Summary writer for each epoch. writer = SummaryWriter(logdir=epoch_dir) # train and save RL agents # Train new best-response policies. if n_processes > 1: logger.info("Begining training attacker and defender in parallel.") time_training = time.time() job_queue = multiprocessing.SimpleQueue() result_queue = multiprocessing.SimpleQueue() attacker_trainer = LearnerWorker(job_queue, result_queue, 1, mix_str_def, epoch) defender_trainer = LearnerWorker(job_queue, result_queue, 0, mix_str_att, epoch) attacker_trainer.start() defender_trainer.start() # Submit training jobs on our game. for _ in range(2): job_queue.put(CloudpickleWrapper(game)) # Send sentinel values to tell processes to cleanly shutdown (1 per worker). for _ in range(2): job_queue.put(None) attacker_trainer.join() defender_trainer.join() # Collect and report results. We need to sort the results because they may appear in any order. results = [] for _ in range(2): results += [result_queue.get()] results = results if not results[0][ 0] else results[::-1] # Put defender first then attacker. # Process results into expected variables for non-distributed. a_BD = results[1][1] d_BD = results[0][1] logger.info("Done training attacker and defender.") logger.info(f"Defender training report: \n{results[0][2]}") logger.info(f"Attacker training report: \n{results[1][2]}") time_training = time.time() - time_training else: logger.info("Begin training attacker......") time_train_attacker = time.time() a_BD, report = training.train(game, 1, mix_str_def, epoch, writer) time_train_attacker = time.time() - time_train_attacker logger.info(f"\n{report}") logger.info("Attacker training done......") logger.info("Begin training defender......") time_train_defender = time.time() d_BD, report = training.train(game, 0, mix_str_att, epoch, writer) time_train_defender = time.time() - time_train_defender logger.info(f"\n{report}") logger.info("Defender training done......") mem1 = proc.memory_info().rss game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) mem2 = proc.memory_info().rss game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. time_extend_game = time.time() game = simulation.simulate_expanded_game(game=game, n_processes=n_processes, save_dir=epoch_dir, summary_writer=writer) time_extend_game = time.time() - time_extend_game mem3 = proc.memory_info().rss # find nash equilibrium using gambit analysis time_gambit = time.time() payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att logger.info("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) time_gambit = time.time() - time_gambit logger.info("RESULTS:") logger.info(' - a_BD_list: {}'.format(game.att_BD_list)) logger.info(' - aPayoff: {}'.format(game.att_payoff)) logger.info(' - d_BD_list: {}'.format(game.def_BD_list)) logger.info(' - dPayoff: {}'.format(game.def_payoff)) logger.info("MEM: {}, {}, {}.".format( (mem1 - mem0) / mem0, (mem2 - mem0) / mem0, (mem3 - mem0) / mem0)) logger.info("TIME: ") if n_processes == 1: logger.info(f" - Training attacker: {time_train_attacker}") logger.info(f" - Training defender: {time_train_defender}") else: logger.info(f" - Training: {time_training}") logger.info(f" - Extend game: {time_extend_game}") logger.info(f" - Gambit: {time_gambit}") logger.info("Round_" + str(epoch) + " has done and game was saved.") logger.info("=======================================================") count -= 1 sys.stdout.flush() # TODO: make sure this is correct. logger.info("END: " + str(epoch)) os._exit(os.EX_OK)
def EGTA(env, game, start_hado=2, retrain=False, transfer=False, epoch=1, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("==============Begin Running HADO-EGTA==================") print("=======================================================") else: print("=======================================================") print("===============Begin Running DO-EGTA===================") print("=======================================================") retrain_start = False proc = psutil.Process(os.getpid()) count = 18 while count != 0: # while True: mem0 = proc.memory_info().rss # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] #TODO: play against uniform # mix_str_def = np.zeros(len(game.nasheq[epoch][0])) # mix_str_def[0] = 1 # mix_str_att = np.zeros(len(game.nasheq[epoch][1])) # mix_str_att[0] = 1 aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True if epoch == 2 and transfer: transfer_flag = False elif transfer: transfer_flag = True else: transfer_flag = False print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start, transfer=transfer_flag) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start, transfer=transfer_flag) print("Defender training done......") mem1 = proc.memory_info().rss if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game, transfer=transfer_flag) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game, transfer=transfer_flag) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") mem2 = proc.memory_info().rss # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag) game = sim_Series.sim_and_modifiy_Series_with_game(game) mem3 = proc.memory_info().rss # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break print("MEM:", (mem1 - mem0) / mem0, (mem2 - mem0) / mem0, (mem3 - mem0) / mem0) count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END: " + str(epoch)) os._exit(os.EX_OK)
def train_and_sim(): arg_path = os.getcwd() + '/inner_egta_arg/' start_hado, retrain = fp.load_pkl(arg_path + 'hado_arg.pkl') epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl') game_path = os.getcwd() + '/game_data/game.pkl' game = fp.load_pkl(game_path) env = game.env retrain_start = False mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag) game = sim_Series.sim_and_modifiy_Series_with_game(game) game.env.attacker.nn_att = None game.env.defender.nn_def = None print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) fp.save_pkl(game, game_path) fp.save_pkl(epoch, arg_path + 'epoch_arg.pkl')