示例#1
0
def EGTA_restart(restart_epoch,
                 start_hado=2,
                 retrain=False,
                 transfer=False,
                 game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("============Continue Running HADO-EGTA=================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("=============Continue Running DO-EGTA==================")
        print("=======================================================")

    epoch = restart_epoch - 1
    game = fp.load_pkl(game_path)
    env = game.env

    retrain_start = False

    count = 8 - restart_epoch
    while count != 0:
        # while True:
        # fix opponent strategy
        mix_str_def = game.nasheq[epoch][0]
        mix_str_att = game.nasheq[epoch][1]
        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        print("Current epoch is " + str(epoch))
        print("epoch " + str(epoch) + ':', datetime.datetime.now())

        # train and save RL agents

        if retrain and epoch > start_hado:
            retrain_start = True

        print("Begin training attacker......")
        a_BD = training.training_att(game,
                                     mix_str_def,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer)
        print("Attacker training done......")

        print("Begin training defender......")
        d_BD = training.training_def(game,
                                     mix_str_att,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer)
        print("Defender training done......")

        if retrain and epoch > start_hado:
            print("Begin retraining attacker......")
            training.training_hado_att(game)
            print("Attacker retraining done......")

            print("Begin retraining defender......")
            training.training_hado_def(game)
            print("Defender retraining done......")

            # Simulation for retrained strategies and choose the best one as player's strategy.
            print('Begin retrained sim......')
            a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def,
                                     epoch)
            print('Done retrained sim......')

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        # else:
        #
        #     # Judge beneficial deviation
        #     # one plays nn and another plays ne strategy
        #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
        #     nn_def = mix_str_def
        #     # if MPI_flag:
        #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for a_BD.")
        #
        #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = mix_str_att
        #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
        #     # if MPI_flag:
        #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for d_BD.")

        # #TODO: This may lead to early stop.
        # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
        #     print("*************************")
        #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
        #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
        #     print("*************************")
        #     break
        #
        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        game = sim_Series.sim_and_modifiy_Series_with_game(game)

        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)

        print('a_BD_list', game.att_BD_list)
        print('aPayoff', game.att_payoff)
        print('d_BD_list', game.def_BD_list)
        print('dPayoff', game.def_payoff)

        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END EPOCH: " + str(epoch))
    print(datetime.datetime.now())
示例#2
0
def _run(env,
         game,
         meta_method_name,
         epoch: int = 1,
         game_path: str = None,
         n_processes: int = 1):
    assert n_processes > 0, "Invalid number of processors."
    if game_path is None:
        game_path = osp.join(settings.get_run_dir(), "game.pkl")

    logger.info("=======================================================")
    logger.info("===============Begin Running DO-EGTA===================")
    logger.info("=======================================================")

    proc = psutil.Process(os.getpid())
    result_dir = settings.get_run_dir()

    selector = meta_method_selector(meta_method_name)

    count = 80
    while count != 0:
        mem0 = proc.memory_info().rss

        # Fix opponent strategy.
        mix_str_def, mix_str_att = selector.sample(game, epoch)

        # Save mixed strategies.
        # with open(osp.join(result_dir, f"mix_defender.{epoch}.pkl"), "wb") as outfile:
        #     pickle.dump(mix_str_def, outfile)
        # with open(osp.join(result_dir, f"mix_attacker.{epoch}.pkl"), "wb") as outfile:
        #     pickle.dump(mix_str_att, outfile)
        # with open(osp.join(result_dir, f"payoff_defender.{epoch}.pkl"), "wb") as outfile:
        #     pickle.dump(game.payoffmatrix_def, outfile)
        # with open(osp.join(result_dir, f"payoff_attacker.{epoch}.pkl"), "wb") as outfile:
        #     pickle.dump(game.payoffmatrix_att, outfile)

        # Equilibrium pay-off.
        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)
        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        logger.info("Epoch " + str(epoch))
        epoch_dir = osp.join(result_dir, f"epoch_{epoch}")

        # Summary writer for each epoch.
        writer = SummaryWriter(logdir=epoch_dir)

        # train and save RL agents

        # Train new best-response policies.
        if n_processes > 1:
            logger.info("Begining training attacker and defender in parallel.")
            time_training = time.time()
            job_queue = multiprocessing.SimpleQueue()
            result_queue = multiprocessing.SimpleQueue()

            attacker_trainer = LearnerWorker(job_queue, result_queue, 1,
                                             mix_str_def, epoch)
            defender_trainer = LearnerWorker(job_queue, result_queue, 0,
                                             mix_str_att, epoch)

            attacker_trainer.start()
            defender_trainer.start()

            # Submit training jobs on our game.
            for _ in range(2):
                job_queue.put(CloudpickleWrapper(game))
            # Send sentinel values to tell processes to cleanly shutdown (1 per worker).
            for _ in range(2):
                job_queue.put(None)

            attacker_trainer.join()
            defender_trainer.join()

            # Collect and report results. We need to sort the results because they may appear in any order.
            results = []
            for _ in range(2):
                results += [result_queue.get()]
            results = results if not results[0][
                0] else results[::-1]  # Put defender first then attacker.

            # Process results into expected variables for non-distributed.
            a_BD = results[1][1]
            d_BD = results[0][1]

            logger.info("Done training attacker and defender.")
            logger.info(f"Defender training report: \n{results[0][2]}")
            logger.info(f"Attacker training report: \n{results[1][2]}")
            time_training = time.time() - time_training

        else:
            logger.info("Begin training attacker......")
            time_train_attacker = time.time()
            a_BD, report = training.train(game, 1, mix_str_def, epoch, writer)
            time_train_attacker = time.time() - time_train_attacker
            logger.info(f"\n{report}")
            logger.info("Attacker training done......")

            logger.info("Begin training defender......")
            time_train_defender = time.time()
            d_BD, report = training.train(game, 0, mix_str_att, epoch, writer)
            time_train_defender = time.time() - time_train_defender
            logger.info(f"\n{report}")
            logger.info("Defender training done......")

        mem1 = proc.memory_info().rss

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        mem2 = proc.memory_info().rss

        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        time_extend_game = time.time()
        game = simulation.simulate_expanded_game(game=game,
                                                 n_processes=n_processes,
                                                 save_dir=epoch_dir,
                                                 summary_writer=writer)
        time_extend_game = time.time() - time_extend_game
        mem3 = proc.memory_info().rss

        # find nash equilibrium using gambit analysis
        time_gambit = time.time()
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        logger.info("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)
        time_gambit = time.time() - time_gambit

        logger.info("RESULTS:")
        logger.info('  - a_BD_list: {}'.format(game.att_BD_list))
        logger.info('  - aPayoff: {}'.format(game.att_payoff))
        logger.info('  - d_BD_list: {}'.format(game.def_BD_list))
        logger.info('  - dPayoff: {}'.format(game.def_payoff))
        logger.info("MEM: {}, {}, {}.".format(
            (mem1 - mem0) / mem0, (mem2 - mem0) / mem0, (mem3 - mem0) / mem0))
        logger.info("TIME: ")
        if n_processes == 1:
            logger.info(f"  - Training attacker: {time_train_attacker}")
            logger.info(f"  - Training defender: {time_train_defender}")
        else:
            logger.info(f"  - Training: {time_training}")
        logger.info(f"  - Extend game: {time_extend_game}")
        logger.info(f"  - Gambit: {time_gambit}")
        logger.info("Round_" + str(epoch) + " has done and game was saved.")
        logger.info("=======================================================")

        count -= 1
        sys.stdout.flush()  # TODO: make sure this is correct.

    logger.info("END: " + str(epoch))
    os._exit(os.EX_OK)
示例#3
0
def EGTA(env,
         game,
         start_hado=2,
         retrain=False,
         transfer=False,
         epoch=1,
         game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("==============Begin Running HADO-EGTA==================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("===============Begin Running DO-EGTA===================")
        print("=======================================================")

    retrain_start = False

    proc = psutil.Process(os.getpid())

    count = 18
    while count != 0:
        # while True:
        mem0 = proc.memory_info().rss
        # fix opponent strategy
        mix_str_def = game.nasheq[epoch][0]
        mix_str_att = game.nasheq[epoch][1]

        #TODO: play against uniform
        # mix_str_def = np.zeros(len(game.nasheq[epoch][0]))
        # mix_str_def[0] = 1
        # mix_str_att = np.zeros(len(game.nasheq[epoch][1]))
        # mix_str_att[0] = 1

        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        print("Current epoch is " + str(epoch))
        print("epoch " + str(epoch) + ':', datetime.datetime.now())

        # train and save RL agents

        if retrain and epoch > start_hado:
            retrain_start = True

        if epoch == 2 and transfer:
            transfer_flag = False
        elif transfer:
            transfer_flag = True
        else:
            transfer_flag = False

        print("Begin training attacker......")
        a_BD = training.training_att(game,
                                     mix_str_def,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer_flag)
        print("Attacker training done......")

        print("Begin training defender......")
        d_BD = training.training_def(game,
                                     mix_str_att,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer_flag)
        print("Defender training done......")

        mem1 = proc.memory_info().rss

        if retrain and epoch > start_hado:
            print("Begin retraining attacker......")
            training.training_hado_att(game, transfer=transfer_flag)
            print("Attacker retraining done......")

            print("Begin retraining defender......")
            training.training_hado_def(game, transfer=transfer_flag)
            print("Defender retraining done......")

            # Simulation for retrained strategies and choose the best one as player's strategy.
            print('Begin retrained sim......')
            a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def,
                                     epoch)
            print('Done retrained sim......')

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        # else:
        #
        #     # Judge beneficial deviation
        #     # one plays nn and another plays ne strategy
        #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
        #     nn_def = mix_str_def
        #     # if MPI_flag:
        #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for a_BD.")
        #
        #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = mix_str_att
        #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
        #     # if MPI_flag:
        #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for d_BD.")
        mem2 = proc.memory_info().rss

        # #TODO: This may lead to early stop.
        # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
        #     print("*************************")
        #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
        #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
        #     print("*************************")
        #     break
        #
        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag)
        game = sim_Series.sim_and_modifiy_Series_with_game(game)
        mem3 = proc.memory_info().rss
        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)

        print('a_BD_list', game.att_BD_list)
        print('aPayoff', game.att_payoff)
        print('d_BD_list', game.def_BD_list)
        print('dPayoff', game.def_payoff)

        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        print("MEM:", (mem1 - mem0) / mem0, (mem2 - mem0) / mem0,
              (mem3 - mem0) / mem0)
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END: " + str(epoch))
    os._exit(os.EX_OK)
示例#4
0
def train_and_sim():
    arg_path = os.getcwd() + '/inner_egta_arg/'

    start_hado, retrain = fp.load_pkl(arg_path + 'hado_arg.pkl')
    epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl')

    game_path = os.getcwd() + '/game_data/game.pkl'
    game = fp.load_pkl(game_path)
    env = game.env

    retrain_start = False

    mix_str_def = game.nasheq[epoch][0]
    mix_str_att = game.nasheq[epoch][1]
    aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

    game.att_payoff.append(aPayoff)
    game.def_payoff.append(dPayoff)

    # increase epoch
    epoch += 1
    print("Current epoch is " + str(epoch))
    print("epoch " + str(epoch) + ':', datetime.datetime.now())

    # train and save RL agents

    if retrain and epoch > start_hado:
        retrain_start = True

    print("Begin training attacker......")
    a_BD = training.training_att(game,
                                 mix_str_def,
                                 epoch,
                                 retrain=retrain_start)
    print("Attacker training done......")

    print("Begin training defender......")
    d_BD = training.training_def(game,
                                 mix_str_att,
                                 epoch,
                                 retrain=retrain_start)
    print("Defender training done......")

    if retrain and epoch > start_hado:
        print("Begin retraining attacker......")
        training.training_hado_att(game)
        print("Attacker retraining done......")

        print("Begin retraining defender......")
        training.training_hado_def(game)
        print("Defender retraining done......")

        # Simulation for retrained strategies and choose the best one as player's strategy.
        print('Begin retrained sim......')
        a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch)
        print('Done retrained sim......')

    game.att_BD_list.append(a_BD)
    game.def_BD_list.append(d_BD)

    # else:
    #
    #     # Judge beneficial deviation
    #     # one plays nn and another plays ne strategy
    #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
    #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
    #     nn_def = mix_str_def
    #     # if MPI_flag:
    #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
    #     # else:
    #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
    #     print("Simulation done for a_BD.")
    #
    #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
    #     nn_att = mix_str_att
    #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
    #     # if MPI_flag:
    #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
    #     # else:
    #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
    #     print("Simulation done for d_BD.")

    # #TODO: This may lead to early stop.
    # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
    #     print("*************************")
    #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
    #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
    #     print("*************************")
    #     break
    #
    game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
    game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

    # simulate and extend the payoff matrix.
    # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag)
    game = sim_Series.sim_and_modifiy_Series_with_game(game)

    game.env.attacker.nn_att = None
    game.env.defender.nn_def = None

    print('a_BD_list', game.att_BD_list)
    print('aPayoff', game.att_payoff)
    print('d_BD_list', game.def_BD_list)
    print('dPayoff', game.def_payoff)

    fp.save_pkl(game, game_path)
    fp.save_pkl(epoch, arg_path + 'epoch_arg.pkl')