示例#1
0
def replaceability(nash_att, nash_def, payoffmatrix_def, payoffmatrix_att,
                   child_partition):
    """
    This function calculates the replaceability of heuristics.
    :param child_partition:
    :return:
    """
    rep = {}
    positions = find_heuristic_position(child_partition)
    pos_to_method = {y: x for x, y in positions.iteritems()}

    nash_indicator_att = nash_att.copy()
    nash_indicator_att[nash_indicator_att > 0] = 1
    nash_indicator_def = nash_def.copy()
    nash_indicator_def[nash_indicator_def > 0] = 1

    dPayoff = np.round(np.sum(nash_def * payoffmatrix_def * nash_att),
                       decimals=2)
    aPayoff = np.round(np.sum(nash_def * payoffmatrix_att * nash_att),
                       decimals=2)

    utils_def = np.round(np.sum(payoffmatrix_def * nash_att, axis=1),
                         decimals=2)
    utils_att = np.round(np.sum(nash_def * payoffmatrix_att, axis=0),
                         decimals=2)

    utils_def = np.reshape(utils_def, newshape=np.shape(utils_att))

    for method in child_partition:
        start, end = positions[method]

        utils_def[start:end] = -10000
        utils_att[start:end] = -10000
示例#2
0
def regret_curves(payoffmatrix_def, payoffmatrix_att, child_partition):
    """
    Calculate the epsilon of each subgame.
    :param ne_dict: {"baseline": game.nasheq}
    :return:
    """
    curves_att = {}
    curves_def = {}
    num_str, _ = np.shape(payoffmatrix_att)
    positions = find_heuristic_position(child_partition)
    for method in child_partition:
        curves_att[method] = []
        curves_def[method] = []
        start, end = positions[method]
        submatrix_def = payoffmatrix_def[start:end, :]
        submatrix_att = payoffmatrix_att[:, start:end]
        subgame_def = payoffmatrix_def[start:end, start:end]
        subgame_att = payoffmatrix_att[start:end, start:end]

        zeros = np.zeros(end - start)
        for epoch in np.arange(end):
            subsubgame_def = subgame_def[:epoch, :epoch]
            subsubgame_att = subgame_att[:epoch, :epoch]

            # TODO: Error: line 4:2: Expecting outcome or payoff
            nash_att, nash_def = do_gambit_analysis(subsubgame_def,
                                                    subsubgame_att,
                                                    maxent=False,
                                                    minent=True)

            # TODO: Is this correct?? NO.
            nash_def = zeros[len(nash_def)] + nash_def
            nash_att = zeros[len(nash_att)] + nash_att

            nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1))

            payoff_vect_att = np.sum(nash_def * submatrix_def, axis=0)
            payoff_vect_def = np.sum(submatrix_att * nash_att, axis=1)

            payoffmatrix_def = np.reshape(payoffmatrix_def,
                                          newshape=np.shape(payoff_vect_att))

            nash_payoff_att = np.round(np.sum(nash_def * subgame_att *
                                              nash_att),
                                       decimals=2)
            nash_payoff_def = np.round(np.sum(nash_def * subgame_def *
                                              nash_att),
                                       decimals=2)

            deviation_att = np.max(payoff_vect_att)
            deviation_def = np.max(payoff_vect_def)
            regret_att = np.maximum(deviation_att - nash_payoff_att, 0)
            regret_def = np.maximum(deviation_def - nash_payoff_def, 0)

            curves_att[method].append(regret_att)
            curves_def[method].append(regret_def)

    return curves_att, curves_def
示例#3
0
def eligibility_trace(nasheq_dict, child_partition, gamma=0.7):
    '''
    This function calculates the eligibility trace of strategies based on game.nasheq.
    :param nasheq_dict: {"baselines": game.nasheq}
    :param gamma:
    :return:
    '''
    position = find_heuristic_position(child_partition)
    total_num_str = 0
    for method in child_partition:
        total_num_str += child_partition[method]

    et_dict_def = {}
    et_dict_att = {}

    nash_thred = 0.05

    # Construct eligibility trace.
    for method in nasheq_dict:
        et_dict_def[method] = np.zeros(child_partition[method])
        et_dict_att[method] = np.zeros(child_partition[method])
        nasheq = nasheq_dict[method]
        for epoch in np.arange(1, child_partition[method] + 1):
            et_dict_att[method] *= gamma
            ne_att = nasheq[epoch][1][1:]
            ne_att[ne_att <= nash_thred] = 0
            ne_att[ne_att > nash_thred] = -2
            et_dict_att[method][:len(ne_att)] += ne_att

            et_dict_def[method] *= gamma
            ne_def = nasheq[epoch][0][1:]
            ne_def[ne_def <= nash_thred] = 0
            ne_def[ne_def > nash_thred] = -2
            et_dict_def[method][:len(ne_def)] += ne_def

    # Put strategies into the queue with the eligibility trace as priority.
    pq_def = {}
    pq_att = {}
    for method in et_dict_def:
        pq_def[method] = pq()
        pq_att[method] = pq()
        start, end = position[method]
        idx_str = start + np.arange(child_partition[method])
        idx_et_pair_def = zip(et_dict_def[method], idx_str)
        idx_et_pair_att = zip(et_dict_att[method], idx_str)
        for pair in idx_et_pair_def:
            pq_def[method].put(pair)

        for pair in idx_et_pair_att:
            pq_att[method].put(pair)

    return pq_def, pq_att
示例#4
0
def formal_regret_curves(payoffmatrix_def, payoffmatrix_att, child_partition):
    positions = find_heuristic_position(child_partition)
    curves_dict_def = {}
    curves_dict_att = {}
    for method in child_partition:
        curves_dict_def[method] = []
        curves_dict_att[method] = []
    for epoch in np.arange(40):
        for method in child_partition:
            if method == 'RM':
                continue

            start, end = positions[method]
            print(start, end)

            submatrix_att = payoffmatrix_att[start:start + epoch + 1,
                                             start:start + epoch + 1]
            submatrix_def = payoffmatrix_def[start:start + epoch + 1,
                                             start:start + epoch + 1]

            # print('X:', start, start+epoch+1)

            nash_att, nash_def = do_gambit_analysis(submatrix_def,
                                                    submatrix_att,
                                                    maxent=True)

            nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1))

            ne_payoff_def = np.sum(nash_def * submatrix_def * nash_att)
            ne_payoff_att = np.sum(nash_def * submatrix_att * nash_att)

            dev_def = np.max(
                np.sum(payoffmatrix_def[:, start:start + epoch + 1] * nash_att,
                       axis=1))
            dev_att = np.max(
                np.sum(nash_def * payoffmatrix_att[start:start + epoch + 1, :],
                       axis=0))

            curves_dict_def[method].append(
                np.maximum(dev_def - ne_payoff_def, 0))
            curves_dict_att[method].append(
                np.maximum(dev_att - ne_payoff_att, 0))

    return curves_dict_def, curves_dict_att
示例#5
0
def NE_regret(regret_vect_att, regret_vect_def, payoffmatrix_att,
              payoffmatrix_def, child_partition):
    """
    Calculate the regret of each heuristic with respect to the combined game. The strategies of each heuristic only\
    include those in the NE of each heuristic.
    :param regret_vect: regret vector calculated from combined game.
    :param ne_dict: {"baseline": {0: np.array([1,0,1,0...]), 1: np.array([1,0,1,0...])},
    "RS": np.array([0,0,1,0...])} when a strategy is in a NE, that strategy is indicated by 1.
    :return:
    """

    regret_dict = {}
    positions = find_heuristic_position(child_partition)
    for method in child_partition:
        start, end = positions[method]
        print(start, end)
        submatrix_att = payoffmatrix_att[start:end, start:end]
        submatrix_def = payoffmatrix_def[start:end, start:end]

        # submatrix_att = payoffmatrix_att[start:start+32, start:start+32]
        # submatrix_def = payoffmatrix_def[start:start+32, start:start+32]

        nash_att, nash_def = do_gambit_analysis(submatrix_def,
                                                submatrix_att,
                                                maxent=True)

        nash_att[nash_att > 0] = 1
        nash_def[nash_def > 0] = 1

        regret_dict[method] = {
            0:
            np.sum(regret_vect_def[start:end] * nash_def) / np.sum(nash_def),
            1: np.sum(regret_vect_att[start:end] * nash_att) / np.sum(nash_att)
        }

        # regret_dict[method] = {0: np.sum(regret_vect_def[start:start+30] * nash_def) / np.sum(nash_def),
        #                        1: np.sum(regret_vect_att[start:start+30] * nash_att) / np.sum(nash_att)}

    return regret_dict
示例#6
0
def regret_fixed_matrix(payoffmatrix_def, payoffmatrix_att, child_partition):
    positions = find_heuristic_position(child_partition)
    for method in child_partition:
        start, end = positions[method]
        print(start, end)
        # submatrix_att = payoffmatrix_att[start:end, start:end]
        # submatrix_def = payoffmatrix_def[start:end, start:end]

        submatrix_att = payoffmatrix_att[start:start + 32, start:start + 32]
        submatrix_def = payoffmatrix_def[start:start + 32, start:start + 32]

        nash_att, nash_def = do_gambit_analysis(submatrix_def,
                                                submatrix_att,
                                                maxent=True)

        nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1))

        ne_payoff_def = np.sum(nash_def * submatrix_def * nash_att)
        ne_payoff_att = np.sum(nash_def * submatrix_att * nash_att)

        # dev_def = np.max(np.sum(payoffmatrix_def[:, start:end] * nash_att, axis=1))
        # dev_att = np.max(np.sum(nash_def * payoffmatrix_att[start:end, :], axis=0))

        dev_def = np.max(
            np.sum(payoffmatrix_def[:, start:start + 32] * nash_att, axis=1))
        # print(np.argmax(np.sum(payoffmatrix_def[:, start:end] * nash_att, axis=1)))
        dev_att = np.max(
            np.sum(nash_def * payoffmatrix_att[start:start + 32, :], axis=0))
        # print(np.argmax(np.sum(nash_def * payoffmatrix_att[start:end, :], axis=0)))

        print('------------------------------------------')
        print("The current method is ", method)
        print("The defender's regret is", np.maximum(dev_def - ne_payoff_def,
                                                     0))
        print("The attacker's regret is", np.maximum(dev_att - ne_payoff_att,
                                                     0))
    print("==================================================")
示例#7
0
def ne_search_wo_etrace(payoff_matrix_def, payoff_matrix_att, child_partition):
    position = find_heuristic_position(child_partition)

    total_num_str = 0
    init_flag = False

    # Assume 2 methods. Find candidate NE in the first subgame.
    for method in child_partition:
        if not init_flag:
            nash_att, nash_def = do_gambit_analysis(
                payoff_matrix_def[:child_partition[method], :
                                  child_partition[method]],
                payoff_matrix_att[:child_partition[method], :
                                  child_partition[method]],
                maxent=False,
                minent=False)
            # Strategies of current game
            strategy_set_def = list(range(child_partition[method]))
            strategy_set_att = list(range(child_partition[method]))
            init_flag = True

        total_num_str += child_partition[method]

    # Extend the NE to the length of the combined game.
    zeros_def = np.zeros(total_num_str)
    zeros_att = np.zeros(total_num_str)
    zeros_def[:len(nash_def)] = nash_def
    zeros_att[:len(nash_def)] = nash_att
    nash_def = zeros_def
    nash_att = zeros_att

    # indicator_matrix records which cell has been simulated in the payoff matrix.
    indicator_matrix = np.zeros((total_num_str, total_num_str))
    for method in position:
        start, end = position[method]
        indicator_matrix[start:end, start:end] = 1

    nash_def_T = np.reshape(nash_def, newshape=(len(nash_def), 1))

    payoff_def = np.sum(nash_def_T * payoff_matrix_def * nash_att)
    payoff_att = np.sum(nash_def_T * payoff_matrix_att * nash_att)

    support_idx_def = np.where(nash_def > 0)[0]
    support_idx_att = np.where(nash_att > 0)[0]

    # Change to simulation mode when simulation is needed.
    while True:

        for x in support_idx_def:
            indicator_matrix[x, :] = 1
        for y in support_idx_att:
            indicator_matrix[:, y] = 1

        dev_payoff_def = np.max(np.sum(payoff_matrix_def * nash_att, axis=1))
        dev_payoff_att = np.max(np.sum(nash_def_T * payoff_matrix_att, axis=0))

        dev_def = np.argmax(np.sum(payoff_matrix_def * nash_att, axis=1))
        dev_att = np.argmax(np.sum(nash_def * payoff_matrix_att, axis=0))

        if dev_payoff_def <= payoff_def and dev_payoff_att <= payoff_att:
            break
        if dev_payoff_def > payoff_def:
            strategy_set_def.append(dev_def)
            strategy_set_def.sort()
            indicator_matrix[dev_def, :] = 1
        else:
            strategy_set_def.append(dev_def)
            strategy_set_def.sort()
            indicator_matrix[dev_def, :] = 1

        if dev_payoff_att > payoff_att:
            strategy_set_att.append(dev_att)
            strategy_set_att.sort()
            indicator_matrix[:, dev_att] = 1
        else:
            strategy_set_att.append(dev_att)
            strategy_set_att.sort()
            indicator_matrix[:, dev_att] = 1

        subgame_def = es(strategy_set_def, strategy_set_att, payoff_matrix_def)
        subgame_att = es(strategy_set_def, strategy_set_att, payoff_matrix_att)

        # print(strategy_set_def, strategy_set_att)
        # print(np.shape(subgame_def), np.shape(subgame_att))

        nash_att, nash_def = do_gambit_analysis(subgame_def,
                                                subgame_att,
                                                maxent=False,
                                                minent=False)
        nash_def_T = np.reshape(nash_def, newshape=(len(nash_def), 1))

        payoff_def = np.sum(nash_def_T * subgame_def * nash_att)
        payoff_att = np.sum(nash_def_T * subgame_att * nash_att)

        zeros_def = np.zeros(total_num_str)
        zeros_att = np.zeros(total_num_str)
        for pos, value in zip(strategy_set_att, nash_att):
            zeros_att[pos] = value
        for pos, value in zip(strategy_set_def, nash_def):
            zeros_def[pos] = value

        nash_def = zeros_def
        nash_att = zeros_att

        support_idx_def = np.where(nash_def > 0)[0]
        support_idx_att = np.where(nash_att > 0)[0]

    # Payoff matrix of subgames denotes 5.
    for method in position:
        start, end = position[method]
        indicator_matrix[start:end, start:end] = 5

    return nash_def, nash_att, indicator_matrix