Пример #1
0
def EGTA_restart(restart_epoch,
                 start_hado=2,
                 retrain=False,
                 game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("============Continue Running HADO-EGTA=================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("=============Continue Running DO-EGTA==================")
        print("=======================================================")

    epoch = restart_epoch - 1
    game = fp.load_pkl(game_path)
    env = game.env

    retrain_start = False

    count = 8 - restart_epoch
    while count != 0:
        # while True:
        # fix opponent strategy
        mix_str_def = game.nasheq[epoch][0]
        mix_str_att = game.nasheq[epoch][1]
        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        print("Current epoch is " + str(epoch))
        print("epoch " + str(epoch) + ':', datetime.datetime.now())

        # train and save RL agents

        if retrain and epoch > start_hado:
            retrain_start = True

        print("Begin training attacker......")
        a_BD = training.training_att(game,
                                     mix_str_def,
                                     epoch,
                                     retrain=retrain_start)
        print("Attacker training done......")

        print("Begin training defender......")
        d_BD = training.training_def(game,
                                     mix_str_att,
                                     epoch,
                                     retrain=retrain_start)
        print("Defender training done......")

        if retrain and epoch > start_hado:
            print("Begin retraining attacker......")
            training.training_hado_att(game)
            print("Attacker retraining done......")

            print("Begin retraining defender......")
            training.training_hado_def(game)
            print("Defender retraining done......")

            # Simulation for retrained strategies and choose the best one as player's strategy.
            print('Begin retrained sim......')
            a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def,
                                     epoch)
            print('Done retrained sim......')

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        # else:
        #
        #     # Judge beneficial deviation
        #     # one plays nn and another plays ne strategy
        #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
        #     nn_def = mix_str_def
        #     # if MPI_flag:
        #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print('a_BD is ', a_BD)
        #     print("Simulation done for a_BD.")
        #
        #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = mix_str_att
        #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
        #     # if MPI_flag:
        #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print('d_BD is ', d_BD)
        #     print("Simulation done for d_BD.")

        # #TODO: This may lead to early stop.
        # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
        #     print("*************************")
        #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
        #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
        #     print("*************************")
        #     break
        #
        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        game = sim_Series.sim_and_modifiy_Series_with_game(game)

        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)
        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END EPOCH: " + str(epoch))
    print(datetime.datetime.now())
Пример #2
0
def initialize(load_env=None, env_name=None):
    print("=======================================================")
    print("=======Begin Initialization and first epoch============")
    print("=======================================================")

    # Create Environment
    if isinstance(load_env, str):
        path = os.getcwd() + load_env + '.pkl'
        if not fp.isExist(path):
            raise ValueError("The env being loaded does not exist.")
        env = fp.load_pkl(path)
    else:
        # env is created and saved.
        env = dag.env_rand_gen_and_save(env_name)

    # save graph copy
    env.save_graph_copy()
    env.save_mask_copy()

    # create players and point to their env
    env.create_players()
    env.create_action_space()

    # load param
    param_path = os.getcwd() + '/network_parameters/param.json'
    param = jp.load_json_data(param_path)

    # initialize game data
    game = game_data.Game_data(env,
                               num_episodes=param['num_episodes'],
                               threshold=param['threshold'])
    game.set_hado_param(param=param['hado_param'])
    game.set_hado_time_step(param['retrain_timesteps'])
    game.env.defender.set_env_belong_to(game.env)
    game.env.attacker.set_env_belong_to(game.env)

    env.defender.set_env_belong_to(env)
    env.attacker.set_env_belong_to(env)

    # uniform strategy has been produced ahead of time
    print("epoch 1:", datetime.datetime.now())
    epoch = 1

    act_att = 'att_str_epoch1.pkl'
    act_def = 'def_str_epoch1.pkl'

    game.add_att_str(act_att)
    game.add_def_str(act_def)

    print('Begin simulation for uniform strategy.')
    sys.stdout.flush()
    # simulate using random strategies and initialize payoff matrix
    # if MPI_flag:
    #     aReward, dReward = do_MPI_sim(act_att, act_def)
    # else:
    aReward, dReward = series_sim(game.env, game, act_att, act_def,
                                  game.num_episodes)
    print('Done simulation for uniform strategy.')
    sys.stdout.flush()

    game.init_payoffmatrix(dReward, aReward)
    ne = {}
    ne[0] = np.array([1], dtype=np.float32)
    ne[1] = np.array([1], dtype=np.float32)
    game.add_nasheq(epoch, ne)

    # save a copy of game data
    game_path = os.getcwd() + '/game_data/game.pkl'
    fp.save_pkl(game, game_path)

    sys.stdout.flush()
    return game
Пример #3
0
def series_sim(env, game, nn_att, nn_def, num_episodes):
    aReward_list = np.array([])
    dReward_list = np.array([])
    nn_att_saved = copy.copy(nn_att)
    nn_def_saved = copy.copy(nn_def)

    T = env.T

    # Test if nn_att and nn_def point to one single strategy.
    single_str_att = True
    single_str_def = True
    if isinstance(nn_att, np.ndarray):
        if len(np.where(nn_att > 0.95)[0]) != 1:
            single_str_att = False

    if isinstance(nn_def, np.ndarray):
        if len(np.where(nn_def > 0.95)[0]) != 1:
            single_str_def = False

    _, targetset = get_Targets(env.G)

    for i in range(num_episodes):  #can be run parallel

        # G = copy.deepcopy(env.G_reserved)
        # attacker = copy.deepcopy(env.attacker)
        # defender = copy.deepcopy(env.defender)

        env.reset_everything()
        G = env.G
        attacker = env.attacker
        defender = env.defender

        aReward = 0
        dReward = 0

        if i == 0 or not single_str_att:
            att_uniform_flag = False
            nn_att = copy.copy(nn_att_saved)
            if isinstance(nn_att, np.ndarray):
                str_set = game.att_str
                nn_att = np.random.choice(str_set, p=nn_att)

            if "epoch1.pkl" in nn_att:
                att_uniform_flag = True

            path = os.getcwd() + "/attacker_strategies/" + nn_att
            if att_uniform_flag:
                nn_att_act = fp.load_pkl(path)
            else:
                training_flag = 1
                nn_att_act, sess1, graph1 = load_action_class(
                    path, nn_att, game, training_flag)

        if i == 0 or not single_str_def:
            def_uniform_flag = False
            nn_def = copy.copy(nn_def_saved)
            if isinstance(nn_def, np.ndarray):
                str_set = game.def_str
                nn_def = np.random.choice(str_set, p=nn_def)

            if "epoch1.pkl" in nn_def:
                def_uniform_flag = True

            path = os.getcwd() + "/defender_strategies/" + nn_def
            if def_uniform_flag:
                nn_def_act = fp.load_pkl(path)
            else:
                training_flag = 0
                nn_def_act, sess2, graph2 = load_action_class(
                    path, nn_def, game, training_flag)

        # def_uniform_flag = False
        # att_uniform_flag = False
        #
        # nn_att = copy.copy(nn_att_saved)
        # nn_def = copy.copy(nn_def_saved)
        #
        # # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy.
        # # A str represents the name of a strategy.
        #
        # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str):
        #     str_set = game.att_str
        #     nn_att = np.random.choice(str_set, p=nn_att)
        #
        # if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray):
        #     str_set = game.def_str
        #     nn_def = np.random.choice(str_set, p=nn_def)
        #
        # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray):
        #     str_set = game.att_str
        #     nn_att = np.random.choice(str_set, p=nn_att)
        #     str_set = game.def_str
        #     nn_def = np.random.choice(str_set, p=nn_def)
        #
        # if "epoch1" in nn_att:
        #     att_uniform_flag = True
        #
        # if "epoch1" in nn_def:
        #     def_uniform_flag = True
        #
        # path = os.getcwd() + "/attacker_strategies/" + nn_att
        # if att_uniform_flag:
        #     nn_att_act = fp.load_pkl(path)
        # else:
        #     training_flag = 1
        #     nn_att_act, sess1, graph1 = load_action_class(path, nn_att, game, training_flag)
        #
        # path = os.getcwd() + "/defender_strategies/" + nn_def
        # if def_uniform_flag:
        #     nn_def_act = fp.load_pkl(path)
        # else:
        #     training_flag = 0
        #     nn_def_act, sess2, graph2 = load_action_class(path, nn_def, game, training_flag)

        # print('===================================')
        # print('==========start episode============')
        # print('===================================')
        # print(aReward, dReward)

        for t in range(T):
            # print('====================')
            timeleft = T - t
            if att_uniform_flag:
                attacker.att_greedy_action_builder_single(
                    G, timeleft, nn_att_act)
            else:
                with graph1.as_default():
                    with sess1.as_default():
                        attacker.att_greedy_action_builder_single(
                            G, timeleft, nn_att_act)

            if def_uniform_flag:
                defender.def_greedy_action_builder_single(
                    G, timeleft, nn_def_act)
            else:
                with graph2.as_default():
                    with sess2.as_default():
                        defender.def_greedy_action_builder_single(
                            G, timeleft, nn_def_act)

            att_action_set = attacker.attact
            def_action_set = defender.defact
            # print(t, 'att:', att_action_set)
            # print(t, 'def:', def_action_set)
            for attack in att_action_set:
                if isinstance(attack, tuple):
                    # check OR node
                    aReward += G.edges[attack]['cost']
                    if random.uniform(0, 1) <= G.edges[attack]['actProb']:
                        G.nodes[attack[-1]]['state'] = 1
                else:
                    # check AND node
                    aReward += G.nodes[attack]['aCost']
                    if random.uniform(0, 1) <= G.nodes[attack]['actProb']:
                        G.nodes[attack]['state'] = 1
            # defender's action
            for node in def_action_set:
                G.nodes[node]['state'] = 0
                dReward += G.nodes[node]['dCost']

            # print('Before Traget aRew:', aReward, 'dRew:', dReward)
            # print('target set:', targetset)
            # current_state = []
            # for node in G.nodes:
            #     current_state.append(G.nodes[node]['state'])
            # print('current_state:', current_state)
            for node in targetset:
                if G.nodes[node]['state'] == 1:
                    aReward += G.nodes[node]['aReward']
                    dReward += G.nodes[node]['dPenalty']
            # print('aRew:', aReward, 'dRew:', dReward)

            # update players' observations
            # update defender's observation
            defender.update_obs(defender.get_def_hadAlert(G))
            defender.save_defact2prev()
            defender.defact.clear()
            # update attacker's observation
            attacker.update_obs(attacker.get_att_isActive(G))
            attacker.attact.clear()

        aReward_list = np.append(aReward_list, aReward)
        dReward_list = np.append(dReward_list, dReward)
        # print('alist:', aReward_list)
        # print('dlist:', dReward_list)

    return np.round(np.mean(aReward_list),
                    2), np.round(np.mean(dReward_list), 2)
Пример #4
0
def series_sim(env, game, nn_att, nn_def, size):
    aReward_list = np.array([])
    dReward_list = np.array([])

    nn_att_saved = copy.copy(nn_att)
    nn_def_saved = copy.copy(nn_def)

    if size > 20:
        num_epi = 10
    elif size > 10 and size <= 20:
        num_epi = 20
    else:
        num_epi = 30

    for i in range(2):
        G = copy.deepcopy(env.G_reserved)
        attacker = copy.deepcopy(env.attacker)
        defender = copy.deepcopy(env.defender)
        T = env.T

        aReward = 0
        dReward = 0
        def_uniform_flag = False
        att_uniform_flag = False

        nn_att = copy.copy(nn_att_saved)
        nn_def = copy.copy(nn_def_saved)

        # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy.
        # A str represents the name of a strategy.

        if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str):
            str_set = game.att_str
            nn_att = np.random.choice(str_set, p=nn_att)

        if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray):
            str_set = game.def_str
            nn_def = np.random.choice(str_set, p=nn_def)

        if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray):
            str_set = game.att_str
            nn_att = np.random.choice(str_set, p=nn_att)
            str_set = game.def_str
            nn_def = np.random.choice(str_set, p=nn_def)

        if "epoch1" in nn_att:
            att_uniform_flag = True

        if "epoch1" in nn_def:
            def_uniform_flag = True

        path = os.getcwd() + "/attacker_strategies/" + nn_att
        if att_uniform_flag:
            nn_att_act = fp.load_pkl(path)
        else:
            training_flag = 1
            nn_att_act, sess1, graph1 = load_action_class(
                path, nn_att, game, training_flag)

        path = os.getcwd() + "/defender_strategies/" + nn_def
        if def_uniform_flag:
            nn_def_act = fp.load_pkl(path)
        else:
            training_flag = 0
            nn_def_act, sess2, graph2 = load_action_class(
                path, nn_def, game, training_flag)

        for t in range(T):
            timeleft = T - t
            if att_uniform_flag:
                attacker.att_greedy_action_builder_single(
                    G, timeleft, nn_att_act)
            else:
                with graph1.as_default():
                    with sess1.as_default():
                        attacker.att_greedy_action_builder_single(
                            G, timeleft, nn_att_act)

            if def_uniform_flag:
                defender.def_greedy_action_builder_single(
                    G, timeleft, nn_def_act)
            else:
                with graph2.as_default():
                    with sess2.as_default():
                        defender.def_greedy_action_builder_single(
                            G, timeleft, nn_def_act)

            att_action_set = attacker.attact
            def_action_set = defender.defact
            # print('att:', att_action_set)
            # print('def:', def_action_set)
            for attack in att_action_set:
                if isinstance(attack, tuple):
                    # check OR node
                    aReward += G.edges[attack]['cost']
                    if random.uniform(0, 1) <= G.edges[attack]['actProb']:
                        G.nodes[attack[-1]]['state'] = 1
                else:
                    # check AND node
                    aReward += G.nodes[attack]['aCost']
                    if random.uniform(0, 1) <= G.nodes[attack]['actProb']:
                        G.nodes[attack]['state'] = 1
            # defender's action
            for node in def_action_set:
                G.nodes[node]['state'] = 0
                dReward += G.nodes[node]['dCost']
            _, targetset = get_Targets(G)
            for node in targetset:
                if G.nodes[node]['state'] == 1:
                    aReward += G.nodes[node]['aReward']
                    dReward += G.nodes[node]['dPenalty']

        aReward_list = np.append(aReward_list, aReward)
        dReward_list = np.append(dReward_list, dReward)

    return np.mean(aReward_list), np.mean(dReward_list)
Пример #5
0
def train_and_sim():
    arg_path = os.getcwd() + '/inner_egta_arg/'

    start_hado, retrain = fp.load_pkl(arg_path + 'hado_arg.pkl')
    epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl')

    game_path = os.getcwd() + '/game_data/game.pkl'
    game = fp.load_pkl(game_path)
    env = game.env

    retrain_start = False

    mix_str_def = game.nasheq[epoch][0]
    mix_str_att = game.nasheq[epoch][1]
    aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

    game.att_payoff.append(aPayoff)
    game.def_payoff.append(dPayoff)

    # increase epoch
    epoch += 1
    print("Current epoch is " + str(epoch))
    print("epoch " + str(epoch) + ':', datetime.datetime.now())

    # train and save RL agents

    if retrain and epoch > start_hado:
        retrain_start = True

    print("Begin training attacker......")
    a_BD = training.training_att(game,
                                 mix_str_def,
                                 epoch,
                                 retrain=retrain_start)
    print("Attacker training done......")

    print("Begin training defender......")
    d_BD = training.training_def(game,
                                 mix_str_att,
                                 epoch,
                                 retrain=retrain_start)
    print("Defender training done......")

    if retrain and epoch > start_hado:
        print("Begin retraining attacker......")
        training.training_hado_att(game)
        print("Attacker retraining done......")

        print("Begin retraining defender......")
        training.training_hado_def(game)
        print("Defender retraining done......")

        # Simulation for retrained strategies and choose the best one as player's strategy.
        print('Begin retrained sim......')
        a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch)
        print('Done retrained sim......')

    game.att_BD_list.append(a_BD)
    game.def_BD_list.append(d_BD)

    # else:
    #
    #     # Judge beneficial deviation
    #     # one plays nn and another plays ne strategy
    #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
    #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
    #     nn_def = mix_str_def
    #     # if MPI_flag:
    #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
    #     # else:
    #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
    #     print("Simulation done for a_BD.")
    #
    #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
    #     nn_att = mix_str_att
    #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
    #     # if MPI_flag:
    #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
    #     # else:
    #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
    #     print("Simulation done for d_BD.")

    # #TODO: This may lead to early stop.
    # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
    #     print("*************************")
    #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
    #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
    #     print("*************************")
    #     break
    #
    game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
    game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

    # simulate and extend the payoff matrix.
    # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag)
    game = sim_Series.sim_and_modifiy_Series_with_game(game)

    game.env.attacker.nn_att = None
    game.env.defender.nn_def = None

    print('a_BD_list', game.att_BD_list)
    print('aPayoff', game.att_payoff)
    print('d_BD_list', game.def_BD_list)
    print('dPayoff', game.def_payoff)

    fp.save_pkl(game, game_path)
    fp.save_pkl(epoch, arg_path + 'epoch_arg.pkl')