Пример #1
0
def SPSuperSet(data, target, PCS, d_sep, alaph, is_discrete):
    ci_number = 0
    _, kVar = np.shape(data)
    SPS = []
    for x in PCS:
        SPS_x = []
        vari_set = [i for i in range(kVar) if i != target and i not in PCS]
        for y in vari_set:
            conditon_set = [i for i in d_sep[y]]
            conditon_set.append(x)
            conditon_set = list(set(conditon_set))
            ci_number += 1
            pval, _ = cond_indep_test(data, target, y, conditon_set,
                                      is_discrete)
            if pval <= alaph:
                SPS_x.append(y)

        SPS_x_temp = SPS_x.copy()
        for y in SPS_x_temp:
            SPS_x_rmy = [i for i in SPS_x if i != y]
            for z in SPS_x_rmy:
                ci_number += 1
                pval, _ = cond_indep_test(data, target, y, [x, z], is_discrete)
                if pval > alaph:
                    SPS_x.remove(y)
                    break

        SPS = list(set(SPS).union(set(SPS_x)))

    return SPS, ci_number
Пример #2
0
def PCSuperSet(data, target, alaph, is_discrete):
    ci_number = 0
    d_sep = dict()
    _, kVar = np.shape(data)
    PCS = [i for i in range(kVar) if i != target]
    PCS_temp = PCS.copy()
    for x in PCS_temp:
        ci_number += 1
        pval, _ = cond_indep_test(data, target, x, [], is_discrete)
        if pval > alaph:
            PCS.remove(x)
            d_sep.setdefault(x, [])

    PCS_temp = PCS.copy()
    for x in PCS_temp:
        PCS_rmX = [i for i in PCS if i != x]
        for y in PCS_rmX:
            ci_number += 1
            pval, _ = cond_indep_test(data, target, x, [y], is_discrete)
            if pval > alaph:
                PCS.remove(x)
                d_sep.setdefault(x, [y])
                break

    return PCS, d_sep, ci_number
Пример #3
0
def getMinDep(data, target, x, CPC, alpha, is_discrete):

    """this function is to chose min dep(association) about Target,x|(subsets of CPC)"""

    ci_number = 0
    dep_min = float("inf")
    max_k = 3
    # 在这图中很少一个节点的Perents或child(其中一个)超过三个,即最多图中a->b,c,d->z,所以最多条件集三个(a,z)|(b,c,d)
    # 便可测试出(a,z)是否独立,可极大得减少时间复杂度
    if len(CPC) > max_k:
        k_length = max_k
    else:
        k_length = len(CPC)
    for i in range(k_length+1):
        SS = subsets(CPC, i)
        for S in SS:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, S, is_discrete)
            # this judge about target and x whether or not is condition independence ,if true,dep must be zero,
            # and end operating of function of getMinDep
            if pval > alpha:
                return 0, S ,ci_number
            if dep_min > dep:
                dep_min = dep
    return dep_min, None, ci_number
Пример #4
0
def FBED(data, target, k, alaph, is_discrete=True):
    S = []
    k_cur = 0
    s_change_flag = True
    ci_number = 0

    # Forward phase
    while k_cur <= k and s_change_flag == True:
        S_last = S.copy()
        S, ci_num = one_run(data, target, S, alaph, is_discrete)
        k_cur += 1
        ci_number += ci_num

        if set(S_last) == set(S):
            s_change_flag = False

    # Backward phase
    # print("now S have: " + str(S))
    S_temp = S.copy()
    for x in S_temp:
        condition_set = [i for i in S if i != x]
        ci_number += 1
        pval, _ = cond_indep_test(data, target, x, condition_set, is_discrete)
        # print("x is: " + str(x) + " ,conditionset is:" + str(condition_set))
        if pval > alaph:
            S.remove(x)

    return list(set(S)), ci_number
Пример #5
0
def IPC_MB(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    CanADJT = [i for i in range(kVar) if i != target]
    PC, sepset, ci_number = RecognizePC(data, target, CanADJT, alaph,
                                        is_discrete)
    # print("pc is: " + str(PC))
    # print("sepset is: " + str(sepset))
    MB = PC.copy()

    for x in PC:
        CanADJT_X = [i for i in range(kVar) if i != x]
        CanSP, _, ci_num2 = RecognizePC(data, x, CanADJT_X, alaph, is_discrete)
        ci_number += ci_num2
        # print("CanSP:" + str(CanSP))
        if target not in CanSP:
            MB.remove(x)
            continue
        for y in CanSP:
            if y != target and y not in MB:
                conditionsSet = [i for i in sepset[y]]
                conditionsSet.append(x)
                conditionsSet = list(set(conditionsSet))
                ci_number += 1
                pval, dep = cond_indep_test(data, target, y, conditionsSet,
                                            is_discrete)
                if pval <= alaph:
                    # print("append is:" + str(y)+" conditinSet: " + str(conditionsSet))
                    MB.append(y)

    return list(set(MB)), ci_number
Пример #6
0
def pc_simple(data, target, alaph, isdiscrete):
    number, kVar = np.shape(data)
    ciTest = 0
    k = 0

    # chose all variables except target itself
    PC = [i for i in range(kVar) if i != target]

    while len(PC) > k:

        PC_temp = PC.copy()
        for x in PC_temp:
            # see number of circulate
            condition_subsets = [i for i in PC_temp if i != x]
            if len(condition_subsets) >= k:
                # get a difinite number of subsets of condition_subsets
                css = subsets(condition_subsets, k)
                for s in css:
                    # every k length of subsets should test chi square and if
                    # make x and target CI,x removed
                    pval, dep = cond_indep_test(data, x, target, s, isdiscrete)
                    ciTest += 1
                    if pval > alaph:

                        PC.remove(x)
                        break  # end circulate of s
        k += 1

    return PC, ciTest
Пример #7
0
def MMMB(data, target, alaph, is_discrete=True):
    ci_number = 0
    PC, sepset, ci_num2 = MMPC(data, target, alaph, is_discrete)
    ci_number += ci_num2
    # print("PC is: " + str(PC))
    # print("sepset is: " + str(sepset))
    MB = PC.copy()
    for x in PC:
        # print(x)
        PCofPC, _, ci_num3 = MMPC(data, x, alaph, is_discrete)
        ci_number += ci_num3
        # print("PCofPC is: " + str(PCofPC))
        for y in PCofPC:
            # print("_-++++++-_")
            if y != target and y not in PC:
                conditions_Set = [str(i) for i in sepset[y]]
                conditions_Set.append(str(x))
                conditions_Set = list(set(conditions_Set))
                ci_number += 1
                pval, dep = cond_indep_test(data, target, y, conditions_Set,
                                            is_discrete)
                # print("_----_")
                if pval <= alaph:
                    MB.append(y)
                    break
    return MB, ci_number
Пример #8
0
def RecognizePC(data, target, ADJT, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    NonPC = []
    cutSetSize = 0
    sepset = [[] for i in range(kVar)]
    ci_number = 0
    while len(ADJT) > cutSetSize:
        for x in ADJT:
            ADJT_X = [i for i in ADJT if i != x]
            SSubset = subsets(ADJT_X, cutSetSize)
            for S in SSubset:
                ci_number += 1
                pval_gp, dep_gp = cond_indep_test(data, target, x, S,
                                                  is_discrete)
                if pval_gp > alaph:
                    NonPC.append(x)
                    sepset[x] = [i for i in S]
                    break
        if len(NonPC) > 0:
            ADJT = [i for i in ADJT if i not in NonPC]
            cutSetSize += 1
            NonPC = []
        else:
            break

    return ADJT, sepset, ci_number
Пример #9
0
def IAMB(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    CMB = []
    ci_number = 0
    # forward circulate phase
    circulate_Flag = True
    while circulate_Flag:
        # if not change, forward phase of IAMB is finished.
        circulate_Flag = False
        # tem_dep pre-set infinite negative.
        temp_dep = -(float)("inf")
        y = None
        variables = [i for i in range(kVar) if i != target and i not in CMB]

        for x in variables:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, CMB, is_discrete)

            # chose maxsize of f(X:T|CMB)
            if pval <= alaph:
                if dep > temp_dep:
                    temp_dep = dep
                    y = x

        # if not condition independence the node,appended to CMB
        if y is not None:
            # print('appended is :'+str(y))
            CMB.append(y)
            circulate_Flag = True

    # backward circulate phase
    # print(CMB)
    CMB_temp = CMB.copy()
    for x in CMB_temp:
        # exclude variable which need test p-value
        condition_Variables = [i for i in CMB if i != x]
        ci_number += 1
        pval, dep = cond_indep_test(data, target, x, condition_Variables,
                                    is_discrete)
        if pval > alaph:
            # print("removed variables is: " + str(x))
            CMB.remove(x)

    return CMB, ci_number
Пример #10
0
def IPC_MB(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    CanADJT = [i for i in range(kVar) if i != target]
    PC, sepset, ci_number = RecognizePC(data, target, CanADJT, alaph,
                                        is_discrete)
    # print("pc is: " + str(PC))
    # print("sepset is: " + str(sepset))
    MB = PC.copy()

    for x in PC:
        CanADJT_X = [i for i in range(kVar) if i != x]
        CanSP, _, ci_num2 = RecognizePC(data, x, CanADJT_X, alaph, is_discrete)
        ci_number += ci_num2
        # print("CanSP:" + str(CanSP))
        if target not in CanSP:
            MB.remove(x)
            continue
        for y in CanSP:
            if y != target and y not in MB:
                conditionsSet = [str(i) for i in sepset[y]]

                conditionsSet.append(str(x))
                conditionsSet = list(set(conditionsSet))
                ci_number += 1
                pval, dep = cond_indep_test(data, target, y, conditionsSet,
                                            is_discrete)
                if pval <= alaph:
                    # print("append is:" + str(y)+" conditinSet: " + str(conditionsSet))
                    MB.append(y)

    return MB, ci_number


# data = pd.read_csv("C:/pythonProject/pyCausalFS/data/child_s500_v3.csv")
# print("the file read")
#
# target = 6
# alaph = 0.05
#
# MBs=IPC_MB(data,target,alaph)
# print("MBs is: "+str(MBs))

# F1 is: 0.7997213203463205
# Precision is: 0.893875
# Recall is: 0.7637083333333331
# time is: 26.190546875

#5000

# F1 is: 0.96
# Precision is: 0.94
# Recall is: 1.0
# Distance is: 0.06
# ci_number is: 486.135
# time is: 18.63
Пример #11
0
def IAMB(data, target, alaph, attribute, is_discrete):
    CMB = []
    ci_number = 0

    # forward circulate phase
    circulate_Flag = True
    while circulate_Flag:
        circulate_Flag = False
        # tem_dep pre-set infinite negative.
        temp_dep = -(float)("inf")
        y = None
        variables = [i for i in attribute if i != target and i not in CMB]

        for x in variables:
            ci_number += 1
            pival, dep = cond_indep_test(data, target, x, CMB, is_discrete)

            # chose maxsize of f(X:T|CMB)
            if pival <= alaph:
                if dep > temp_dep:
                    temp_dep = dep
                    y = x

        # if not condition independence the node,appended to CMB
        if y is not None:
            CMB.append(y)
            circulate_Flag = True

    # backward circulate phase
    CMB_temp = CMB.copy()
    for x in CMB_temp:
        # exclude variable which need test p-value
        condition_Variables = [i for i in CMB if i != x]
        ci_number += 1
        pval, dep = cond_indep_test(data, target, x, condition_Variables,
                                    is_discrete)
        if pval > alaph:
            CMB.remove(x)

    return CMB, ci_number
Пример #12
0
def KIAMB(data, target, alaph, k, is_discrete=True):
    n, p = np.shape(data)
    MB = []
    ci_number = 0
    flag = True
    while flag:
        x_dep = [0 for i in range(p)]
        flag = False
        CanMB = []
        variables = [i for i in range(p) if i != target and i not in MB]
        for x in variables:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, MB, is_discrete)
            if pval <= alaph:
                CanMB.append(x)
                x_dep[x] = dep
        if len(CanMB) == 0:
            break
        CanMB2 = random.sample(CanMB, max(1, int(len(CanMB) * k)))
        max_dep = -float("inf")
        Y = None
        for x in CanMB2:
            if x_dep[x] > max_dep:
                Y = x
                max_dep = x_dep[x]
        if Y is not None:
            MB.append(Y)
            flag = True

    # remove false positives from MB
    MB_temp = MB.copy()
    for x in MB_temp:
        condition_set = [i for i in MB if i != x]
        ci_number += 1
        pval, _ = cond_indep_test(data, target, x, condition_set, is_discrete)
        if pval > alaph:
            MB.remove(x)

    return list(set(MB)), ci_number
Пример #13
0
def semi_HITON_MB(data, target, alaph, is_discrete=True):
    TPC, sep, ci_number = semi_HITON_PC(data, target, alaph)
    MB = TPC.copy()
    for x in TPC:
        xPC, sepx, ci_number2 = semi_HITON_PC(data, x, alaph)
        ci_number += ci_number2
        for y in xPC:
            if y != target and y not in TPC:
                condition_set = [str(i) for i in sep[y]]
                condition_set = list(set(condition_set).union(set(str(x))))
                ci_number += 1
                _, pval, _, _ = cond_indep_test(data, target, y, condition_set,
                                                is_discrete)
                if pval <= alaph:
                    # print("append y is " + str(y))
                    MB.append(y)
                    break
    return MB, ci_number


# data = pd.read_csv("F:\cai_algorithm\data\Child_s500_v1.csv")
# MB = semi_HITON_MB(data,1,0.01)
# print(MB)

# 500 0.01
# F1 is: 0.8089410311910312
# Precision is: 0.9234523809523809
# Recall is: 0.7709166666666666
# time is: 16.431171875

# 5000 0.01
# F1 is: 0.9340098937010702
# Precision is: 0.9733333333333334
# Recall is: 0.9137083333333336
# time is: 57.92828125

# 500 0.01
# F1 is: 0.81
# Precision is: 0.92
# Recall is: 0.77
# Distance is: 0.28
# ci_number is: 280.71
# time is: 16.43

# 5000 0.01
# F1 is: 0.93
# Precision is: 0.97
# Recall is: 0.91
# Distance is: 0.11
# ci_number is: 644.42
# time is: 56.91
Пример #14
0
def GSMB(data, target, alaph, is_discrete):
    number, kVar = np.shape(data)
    CMB = []
    ci_number = 0
    circulateFlag = True
    S_variables = [i for i in range(kVar) if i != target]
    """grow phase"""
    # print("grow phase")
    while circulateFlag:
        circulateFlag = False
        for x in S_variables:
            ci_number += 1
            pval_gp, dep_gp = cond_indep_test(data, target, x, CMB,
                                              is_discrete)
            if pval_gp < alaph:
                # print("CMB append is: "+str(x))
                CMB.append(x)
                circulateFlag = True
                break
        S_variables = [i for i in range(kVar) if i != target and i not in CMB]
    """"shrink phase"""
    # print("shrink phase")
    circulateFlag = True
    while circulateFlag:
        circulateFlag = False
        CMB_temp = CMB.copy()
        for x in CMB_temp:
            subsets_CMB = [i for i in CMB if i != x]
            ci_number += 1
            pval_sp, dep_sp = cond_indep_test(data, target, x, subsets_CMB,
                                              is_discrete)
            if pval_sp > alaph:
                # print("CMB remove is: "+ str(x))
                CMB.remove(x)
                circulateFlag = True
                break

    return list(set(CMB)), ci_number
Пример #15
0
def HITON_MB(data, target, alaph, is_discrete=True):

    PC, sepset, ci_number = HITON_PC(data, target, alaph, is_discrete)
    # print("PC is:" + str(PC))
    currentMB = PC.copy()
    for x in PC:
        # print("x is: " + str(x))
        PCofPC, _, ci_num2 = HITON_PC(data, x, alaph, is_discrete)
        ci_number += ci_num2
        # print("PCofPC is " + str(PCofPC))
        for y in PCofPC:
            # print("y is " + str(y))
            if y != target and y not in PC:
                conditions_Set = [str(i) for i in sepset[y]]
                conditions_Set.append(str(x))
                conditions_Set = list(set(conditions_Set))
                ci_number += 1
                pval, dep = cond_indep_test(data, target, y, conditions_Set,
                                            is_discrete)
                if pval <= alaph:
                    # print("append is: " + str(y))
                    currentMB.append(y)
                    break

    return currentMB, ci_number


# data = pd.read_csv("C:/pythonProject/pyCausalFS/data/child_s500_v1.csv")
# print("the file read")
#
# target = 4
# alaph = 0.05
#
# MBs=HITON_MB(data,target,alaph)
# print("MBs is: "+str(MBs))

# 500

# F1 is: 0.8465906593406597
# Precision is: 0.8957857142857146
# Recall is: 0.85525
# time is: 27.555

# 5000

# F1 is: 0.98
# Recall is: 0.99
# Distance is: 0.03
# ci_number is: 1017.85
# time is: 96.69
Пример #16
0
def semi_HITON_MB(data, target, alaph, is_discrete=True):
    TPC, sep, ci_number = semi_HITON_PC(data, target, alaph)
    MB = TPC.copy()
    for x in TPC:
        xPC, sepx, ci_number2 = semi_HITON_PC(data, x, alaph)
        ci_number += ci_number2
        for y in xPC:
            if y != target and y not in TPC:
                condition_set = [i for i in sep[y]]
                condition_set = list(set(condition_set).union(set([x])))
                ci_number += 1
                pval, _ = cond_indep_test(data, target, y, condition_set,
                                          is_discrete)
                if pval <= alaph:
                    # print("append y is " + str(y))
                    MB.append(y)
                    break
    return list(set(MB)), ci_number
Пример #17
0
def one_run(data, target, S, alaph, is_discrete):
    ci_number = 0
    number, kVar = np.shape(data)
    R = [i for i in range(kVar) if i not in S and i != target]
    while len(R) > 0:
        vari_dep_set = []
        for x in R:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, S, is_discrete)
            # print("x is: " + str(x) + " ,S is: " + str(S) + " ,pval is: " + str(pval) + " ,dep is: " + str(dep))
            if pval <= alaph:
                vari_dep_set.append([x, dep])
        vari_dep_set = sorted(vari_dep_set, key=lambda x: x[1], reverse=True)
        # print("varidepset have: "  + str(vari_dep_set))
        if vari_dep_set != []:
            S.append(vari_dep_set[0][0])
            # print("S have: " + str(S))
            del vari_dep_set[0]
            R = [vari_dep_set[i][0] for i in range(len(vari_dep_set))]
            # print("R have: " + str(R))
        else:
            R = []
    return S, ci_number
Пример #18
0
def MBtoPC(data, target, alaph, attribute, is_discrete):
    max_k = 3
    ci_number = 0
    MB, ci_num = IAMB(data, target, alaph, attribute, is_discrete)
    ci_number += ci_num
    PC = MB.copy()
    for x in MB:
        break_flag = False
        condtion_sets_all = [i for i in MB if i != x]
        c_length = len(condtion_sets_all)
        if c_length > max_k:
            c_length = max_k
        for j in range(c_length + 1):
            condtion_sets = subsets(condtion_sets_all, j)
            for Z in condtion_sets:
                ci_number += 1
                pval, _ = cond_indep_test(data, target, x, Z, is_discrete)
                if pval > alaph:
                    PC.remove(x)
                    break_flag = True
                    break
            if break_flag:
                break
    return PC, ci_number
Пример #19
0
def fast_IAMB(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    ci_number = 0

    #BT present B(T) and set null,according to pseudocode
    MB = []

    # set a dictionary to store variables and their pval,but it temporary memory
    S_variables = []
    MBvariables = [i for i in range(kVar) if i != target]
    repeat_in_set = [0 for i in range(kVar)]
    num_reapeat = 10
    no_in_set = []
    for x in MBvariables:
        ci_number += 0
        pval, dep = cond_indep_test(data, target, x, MB, is_discrete)
        if (pval <= alaph):
            S_variables.append([x, dep])
    BT_temp = -1
    """iteritems() 得到的[(键,值)]的列表, 通过sorted方法,指定排序的键值key是原来字典中的value属性,其中
    用到了匿名函数lambda, 参数为t列表,返回第二个元素t[1],也就是每个键值对中的value,  从小到大排序时 reverse=False,
    从大到小排序是True!
     output is [(key,value),...],which is sorted, and other aim is turn dictionary into this structrue [(key,value)]"""

    # preset value
    attributes_removed_Flag = False

    while S_variables != []:
        flag_repeat_set = [False for i in range(kVar)]
        # S sorted according to pval
        S_variables = sorted(S_variables, key=lambda x: x[1], reverse=True)
        # print(S_variables)
        """Growing phase"""
        # print("growing phase begin!")
        S_length = len(S_variables)
        insufficient_data_Flag = False
        attributes_removed_Flag = False
        for y in range(S_length):
            x = S_variables[y][0]
            # number = number
            # print("MBs is: " + str(MBs))
            qi = ns(data, MB)
            # print("qi is: " + str(qi))
            tmp = [1]
            temp1 = []
            if len(qi) > 1:
                temp1 = np.cumprod(qi[0:-1])
            # print("temp1 is: " + str(temp1))
            for i in temp1:
                tmp.append(i)
            # qs = 1 + ([i-1 for i in qi]) * tmp

            # qs = np.array([i-1 for i in qi])* np.array(tmp).reshape(len(tmp),1) + 1
            # print("qi is: " + str(qi) + " ,tmp is: " + str(tmp))
            qs = 0
            if qi == []:
                qs = 0
            else:
                for i in range(len(qi)):
                    qs += (qi[i] - 1) * tmp[i]
                qs += 1

            # print("qs is: " + str(qs))
            qxt = ns(data, [x, target])
            # print("length of qs is:" + str(len(list(qs))))
            # print("qxt is: " + str(qxt))
            if qs == 0:
                df = np.prod(np.mat([i - 1
                                     for i in qxt])) * np.prod(np.mat(qi))
                # print("1 = " + str(np.prod(np.array([i-1 for i in qxt]))) + " , 2 = " + str(np.prod(np.array(qi))))
            else:
                df = np.prod(np.mat([i - 1 for i in qxt])) * qs
                # print("1 = " + str(np.prod(np.array([i-1 for i in qxt])))+" , 22 = " + str(qs))
            # print("df = " + str(df))
            if number >= 5 * df:
                # S_sort = [(key,value),....],and BT append is key
                MB.append(S_variables[y][0])
                flag_repeat_set[S_variables[y][0]] = True
                # print("BT append is: " + str(S_variables[y][0]))
            else:
                # print('1')
                insufficient_data_Flag = True
                # due to insufficient data, then go to shrinking phase
                break
        """shrinking phase"""
        # print("shrinking phase begin")
        if BT_temp == MB:
            break
        BT_temp = MB.copy()
        # print(BT)
        for x in BT_temp:

            subsets_BT = [i for i in MB if i != x]
            ci_number += 1
            pval_sp, dep_sp = cond_indep_test(data, target, x, subsets_BT,
                                              is_discrete)

            if pval_sp > alaph:
                MB.remove(x)
                if flag_repeat_set[x] == True:
                    repeat_in_set[x] += 1
                    if repeat_in_set[x] > num_reapeat:
                        no_in_set.append(x)
                        # print("x not in again is: " + str(x))
                # print("BT remove is: "+str(x))
                attributes_removed_Flag = True

        # if no variable will add to S_variables, circulate will be break,and output the result
        if (insufficient_data_Flag == True) and (attributes_removed_Flag
                                                 == False):
            # print("circulate end!")
            break
        else:
            # set a new S_variables ,and add variable which match the condition
            S_variables = []
            # print("circulate should continue,so S_variable readd variables")
            BTT_variables = [
                i for i in range(kVar)
                if i != target and i not in MB and i not in no_in_set
            ]
            # print(BTT_variables)
            for x in BTT_variables:
                ci_number += 1
                pval, dep = cond_indep_test(data, target, x, MB, is_discrete)
                if pval <= alaph:
                    # print([x,dep])
                    S_variables.append([x, dep])
                    # print("sv is: " + str(S_variables))

    return MB, ci_number
Пример #20
0
def semi_HITON_PC(data, target, alaph, is_disrete=True):
    n, p = np.shape(data)
    ci_number = 0
    candidate_pc = []
    S = []
    current_pc = []
    sep = [[] for i in range(p)]
    con = [i for i in range(p) if i != target]
    for x in con:
        ci_number += 1
        pval, dep = cond_indep_test(data, target, x, [], is_disrete)
        if pval <= alaph:
            S.append([x, dep])

    depset = sorted(S, key=lambda x: x[1], reverse=True)
    for i in range(len(depset)):
        candidate_pc.append(depset[i][0])  # RANK

    for x in candidate_pc:
        breakflag = False
        current_pc.append(x)
        conditions_set = [i for i in current_pc if i != x]
        # print("conditions_set is " + str(conditions_set))
        if len(conditions_set) >= 3:
            Slength = 3
        else:
            Slength = len(conditions_set)
        for j in range(Slength + 1):
            SS = subsets(conditions_set, j)
            for s in SS:
                ci_number += 1
                pval, _ = cond_indep_test(data, x, target, s, is_disrete)
                if pval > alaph:
                    sep[x] = [i for i in s]
                    current_pc.remove(x)
                    breakflag = True
                    break
            if breakflag:
                break

    # backforward phase except the last add variable
    Last_added = None
    if len(current_pc) > 0:
        Last_added = current_pc[-1]

    current_temp = current_pc.copy()
    for x in current_temp:
        flag = False
        if x == Last_added:
            continue
        con_set = [i for i in current_pc if i != x]
        if len(con_set) >= 3:
            leng = 3
        else:
            leng = len(con_set)
        for j in range(leng + 1):
            SS = subsets(con_set, j)
            for s in SS:
                ci_number += 1
                pval, _ = cond_indep_test(data, x, target, s, is_disrete)
                if pval > alaph:
                    current_pc.remove(x)
                    sep[x] = [i for i in s]
                    flag = True
                    break
            if flag:
                break
    return list(set(current_pc)), sep, ci_number
Пример #21
0
def BAMB(data, target, alaph, is_discrete=True):
    ci_number = 0
    number, kVar = np.shape(data)
    max_k = 3
    CPC = []
    TMP = [i for i in range(kVar) if i != target]
    sepset = [[] for i in range(kVar)]
    CSPT = [[] for i in range(kVar)]
    variDepSet = []
    SP = [[] for i in range(kVar)]
    PC = []

    for x in TMP:
        ci_number += 1
        pval_f, dep_f = cond_indep_test(data, target, x, [], is_discrete)
        if pval_f > alaph:
            sepset[x] = []
        else:
            variDepSet.append([x, dep_f])

    variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True)
    """step one: Find the candidate set of PC and candidate set of spouse"""

    # print("variDepSet" + str(variDepSet))
    for variIndex in variDepSet:
        A = variIndex[0]
        # print("A is: " + str(A))
        Slength = len(CPC)
        if Slength > max_k:
            Slength = 3
        breakFlag = False
        for j in range(Slength + 1):
            ZSubsets = subsets(CPC, j)
            for Z in ZSubsets:
                ci_number += 1
                convari = [i for i in Z]
                pval_TAZ, dep_TAZ = cond_indep_test(data, target, A, convari,
                                                    is_discrete)
                if pval_TAZ > alaph:
                    sepset[A] = convari
                    breakFlag = True
                    # print("ZZZ")
                    break
            if breakFlag:
                break

        if not breakFlag:
            CPC_ReA = CPC.copy()
            B_index = len(CPC_ReA)
            CPC.append(A)
            breakF = False
            while B_index > 0:
                B_index -= 1
                B = CPC_ReA[B_index]
                flag1 = False

                conditionSet = [i for i in CPC_ReA if i != B]
                Clength = len(conditionSet)
                if Clength > max_k:
                    Clength = max_k
                for j in range(Clength + 1):
                    CSubsets = subsets(conditionSet, j)
                    for Z in CSubsets:
                        ci_number += 1
                        convari = [i for i in Z]
                        pval_TBZ, dep_TBZ = cond_indep_test(
                            data, target, B, convari, is_discrete)
                        # print("pval_TBZ: " + str(pval_TBZ))
                        if pval_TBZ >= alaph:

                            CPC.remove(B)
                            CSPT[B] = []
                            sepset[B] = convari

                            flag1 = True
                            if B == A:
                                breakF = True
                    if flag1:
                        break
                if breakF:
                    break

            CSPT[A] = []
            pval_CSPT = []

            # add candidate of spouse

            # print("sepset: " + str(sepset))
            for C in range(kVar):
                if C == target or C in CPC:
                    continue
                conditionSet = [i for i in sepset[C]]
                conditionSet.append(A)
                conditionSet = list(set(conditionSet))

                ci_number += 1
                pval_CAT, _ = cond_indep_test(data, target, C, conditionSet,
                                              is_discrete)
                if pval_CAT <= alaph:
                    CSPT[A].append(C)
                    pval_CSPT.append([C, pval_CAT])
            """step 2-1"""

            pval_CSPT = sorted(pval_CSPT, key=lambda x: x[1], reverse=False)
            SP[A] = []
            # print("CSPT-: " +str(CSPT))
            # print("pval_CSPT is: " + str(pval_CSPT))

            for pCSPT_index in pval_CSPT:
                E = pCSPT_index[0]
                # print("E is:" + str(E))

                SP[A].append(E)
                index_spa = len(SP[A])
                breakflag_spa = False
                # print("SP[A] is: " +str(SP[A]))
                while index_spa >= 0:
                    index_spa -= 1
                    x = SP[A][index_spa]
                    breakFlag = False
                    # print("x is:" + str(x))

                    ZAllconditionSet = [i for i in SP[A] if i != x]
                    # print("ZAllconditionSet is:" + str(ZAllconditionSet))
                    for Z in ZAllconditionSet:
                        conditionvari = [Z]
                        if A not in conditionvari:
                            conditionvari.append(A)
                        ci_number += 1
                        pval_TXZ, _ = cond_indep_test(data, target, x,
                                                      conditionvari,
                                                      is_discrete)
                        # print("x is: " + str(x) + "conditionvari: " + str(conditionvari) + " ,pval_TXZ is: " + str(pval_TXZ))
                        if pval_TXZ > alaph:
                            # print("spa is: " + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(conditionvari))
                            SP[A].remove(x)
                            breakFlag = True

                            if x == E:
                                breakflag_spa = True
                            break
                    if breakFlag:
                        break
                if breakflag_spa:
                    break
            """step 2-2"""
            # remove x from pval_CSPT
            pval_CSPT_new = []
            plength = len(pval_CSPT)
            for i in range(plength):
                if pval_CSPT[i][0] in SP[A]:
                    pval_CSPT_new.append(pval_CSPT[i])

            CSPT[A] = SP[A]
            SP[A] = []
            # print("CSPT-: " + str(CSPT))
            # print("2222222pval_CSPT_new is: " + str(pval_CSPT_new))

            for pCSPT_index in pval_CSPT_new:
                E = pCSPT_index[0]
                # print("E2 is:" + str(E))

                SP[A].append(E)
                index_spa = len(SP[A])
                breakflag_spa = False
                # print("SP[A] is: " + str(SP[A]))
                while index_spa >= 0:
                    index_spa -= 1
                    x = SP[A][index_spa]

                    breakFlag = False
                    # print("x is:" + str(x))
                    ZAllSubsets = list(set(CPC).union(set(SP[A])))
                    # print("CPC is: " + str(CPC) + " , SP[A] is: " + str(SP[A]) + " ,A is" + str(A) + " ,x is:" + str(x) + " ,ZA is: " + str(ZAllSubsets))
                    ZAllSubsets.remove(x)
                    ZAllSubsets.remove(A)
                    # print("-ZALLSubsets has: " + str(ZAllSubsets))
                    Zalength = len(ZAllSubsets)
                    if Zalength > max_k:
                        Zalength = max_k
                    for j in range(Zalength + 1):
                        ZaSubsets = subsets(ZAllSubsets, j)
                        for Z in ZaSubsets:
                            Z = [i for i in Z]
                            ci_number += 1
                            pval_TXZ, _ = cond_indep_test(
                                data, A, x, Z, is_discrete)
                            # print("Z is: " + str(Z) + " ,A is: " + str(A) + " ,x is: " + str(x) + " ,pval_txz is: " + str(pval_TXZ))
                            if pval_TXZ > alaph:
                                # print("spa is:" + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(Z))
                                SP[A].remove(x)
                                breakFlag = True
                                if x == E:
                                    breakflag_spa = True
                                break
                        if breakFlag:
                            break
                    if breakflag_spa:
                        break
            """ step 2-3"""
            pval_CSPT_fin = []
            plength = len(pval_CSPT)
            for i in range(plength):
                if pval_CSPT[i][0] in SP[A]:
                    pval_CSPT_fin.append(pval_CSPT[i])

            CSPT[A] = SP[A]
            SP[A] = []
            # print("CSPT-: " +str(CSPT))
            # print("2222222pval_CSPT_fin is: " + str(pval_CSPT_fin))

            for pCSPT_index in pval_CSPT_fin:
                E = pCSPT_index[0]
                # print("E3 is:" + str(E))

                SP[A].append(E)
                index_spa = len(SP[A])
                breakflag_spa = False
                # print("SP[A] is: " + str(SP[A]))
                while index_spa >= 0:
                    index_spa -= 1
                    x = SP[A][index_spa]
                    breakFlag = False

                    # print("x is:" + str(x))
                    ZAllSubsets = list(set(CPC).union(set(SP[A])))
                    ZAllSubsets.remove(x)
                    ZAllSubsets.remove(A)
                    Zalength = len(ZAllSubsets)
                    # print("=-ZALLSubsets has: " + str(ZAllSubsets))
                    if Zalength > max_k:
                        Zalength = max_k
                    for j in range(Zalength + 1):
                        ZaSubsets = subsets(ZAllSubsets, j)
                        # print("ZzSubsets is: " + str(ZaSubsets))
                        for Z in ZaSubsets:
                            Z = [i for i in Z]
                            Z.append(A)
                            # print("Z in ZaSubsets is: " + str(Z))
                            ci_number += 1
                            pval_TXZ, _ = cond_indep_test(
                                data, target, x, Z, is_discrete)
                            # print("-Z is: " + str(Z) + " ,x is: " + str(x) + " ,pval_txz is: " + str(
                            #     pval_TXZ))
                            if pval_TXZ >= alaph:
                                # print("spa is:" + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(Z))
                                SP[A].remove(x)
                                if x == E:
                                    breakflag_spa = True
                                breakFlag = True
                                break
                        if breakFlag:
                            break
                    if breakflag_spa:
                        break
            # print("SP[A]------: " + str(SP[A]))
            CSPT[A] = SP[A]
            # print("CSPT is: " + str(CSPT))
            """step3: remove false positives from the candidate set of PC"""

            CPC_temp = CPC.copy()
            x_index = len(CPC_temp)
            A_breakFlag = False
            # print("-CPC-: " + str(CPC))
            while x_index >= 0:
                x_index -= 1
                x = CPC_temp[x_index]
                flag2 = False
                ZZALLsubsets = [i for i in CPC if i != x]
                # print("xx is: " + str(x) + ", ZZALLsubsets is: " + str(ZZALLsubsets ))
                Zlength = len(ZZALLsubsets)
                if Zlength > max_k:
                    Zlength = max_k
                for j in range(Zlength + 1):
                    Zzsubsets = subsets(ZZALLsubsets, j)
                    for Z in Zzsubsets:
                        conditionSet = [
                            i for y in Z for i in CSPT[y] if i not in CPC
                        ]
                        conditionSet = list(set(conditionSet).union(set(Z)))
                        # print("conditionSet: " + str(conditionSet))
                        ci_number += 1
                        pval, _ = cond_indep_test(data, target, x,
                                                  conditionSet, is_discrete)
                        if pval >= alaph:
                            # print("remove x is: " + str(x) + " , pval is: " + str(pval) + " ,conditionset is: " + str(conditionSet))
                            CPC.remove(x)
                            CSPT[x] = []
                            flag2 = True
                            if x == A:
                                A_breakFlag = True
                            break
                    if flag2:
                        break
                if A_breakFlag:
                    break

    # print("SP is:" + str(SP))
    spouseT = [j for i in CPC for j in CSPT[i]]
    MB = list(set(CPC).union(set(spouseT)))
    return MB, ci_number
Пример #22
0
def MMPC(data, target, alpha, is_discrete):
    number, kVar = np.shape(data)
    ci_number = 0
    CPC = []
    deoZeroSet = []
    sepset = [[] for i in range(kVar)]

    while True:
        M_variables = [
            i for i in range(kVar)
            if i != target and i not in CPC and i not in deoZeroSet
        ]
        vari_all_dep_max = -float("inf")
        vari_chose = 0

        # according to pseudocode, <F,assocF> = MaxMinFeuristic(T;CPC)
        for x in M_variables:
            # use a function of getMinDep to chose min dep of x
            x_dep_min, sepset_temp, ci_num2 = getMinDep(
                data, target, x, CPC, alpha, is_discrete)
            ci_number += ci_num2
            # print(str(x)+" dep min is: " + str(x_dep_min))

            # if x chose min dep is 0, it never append to CPC and should not test from now on,
            if x_dep_min == 0:
                deoZeroSet.append(x)
                sepset[x] = [j for j in sepset_temp]

            elif x_dep_min > vari_all_dep_max:
                vari_chose = x
                vari_all_dep_max = x_dep_min

        # print("x chosed is: " + str(vari_chose)+" and its dep is: " + str(vari_all_dep_max))
        if vari_all_dep_max >= 0:
            # print("CPC append is: "+ str(vari_chose))
            CPC.append(vari_chose)
        else:
            # CPC has not changed(In other world,CPC not append new), circulate should be break
            break
    # print("CPC is:" +str(CPC))
    """phaseII :Backward"""
    # print("shrinking phase begin")

    CPC_temp = CPC.copy()
    max_k = 3
    for a in CPC_temp:
        C_subsets = [i for i in CPC if i != a]

        # please see explanation of the function of getMinDep() explanation
        # the chinese annotation ,if you see,you will know.
        if len(C_subsets) > max_k:
            C_length = max_k
        else:
            C_length = len(C_subsets)

        breakFlag = False
        for length in range(C_length + 1):
            if breakFlag:
                break
            SS = subsets(C_subsets, length)
            for S in SS:
                ci_number += 1
                pval, dep = cond_indep_test(data, target, a, S, is_discrete)
                if pval > alpha:
                    CPC.remove(a)
                    breakFlag = True
                    break

    return list(set(CPC)), sepset, ci_number
Пример #23
0
def inter_IAMB(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    ci_number = 0
    MB=[]
    circulateFlag = True
    removeSet = []
    rmNumberSet = [0 for i in range(kVar)]
    while circulateFlag:
        circulateFlag =False
        # print("MBs is:" + str(MBs))
        dep_temp = - float("inf")
        pval_temp = 1
        max_s = None

        # remove target element from set before test
        variables =[i for i in range(kVar) if i != target and i not in MB and i not in removeSet]

        # growing phase
        for s in variables:
            ci_number += 1
            # print(numberOfCirculate)

            pval_gp, dep_gp = cond_indep_test(data, target,s, MB, is_discrete)

            if dep_gp > dep_temp:
                dep_temp = dep_gp
                max_s = s
                pval_temp = pval_gp

        if pval_temp <= alaph:
            # if any changes ,circulate should be continue
            circulateFlag = True
            MB.append(max_s)
            # print("BT append vari is:" + str(max_s))

        # if not append any variables to BT before this,the shirnking phase must not delete any variables.
        # save time
        if circulateFlag == False:
            break


        # print("----> shrinking phase")
        # use mb_index ,to be pointer
        mb_index = len(MB)
        # 逆序
        while mb_index >= 0:
            mb_index -= 1
            x = MB[mb_index]

            ci_number += 1

            subsets_Variables = [i for i in MB if i != x]
            pval_sp, dep_sp = cond_indep_test(data, target, x, subsets_Variables, is_discrete)
            if pval_sp > alaph:
                MB.remove(x)
                # remove the variables while have be append to MBs just,lead to circulation break
                if x == max_s:
                    break

                rmNumberSet[x] += 1
                if rmNumberSet[x] > 10:
                    removeSet.append(x)
                # print("BT remove vari is: "+ str(x) + " ,rmNumberSet[x] is:" + str(rmNumberSet[x]))
                # if any changes,circulate should be contine
                # circulateFlag = True

    return list(set(MB)), ci_number
Пример #24
0
def IAMB(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    CMB = []
    ci_number = 0
    # forward circulate phase
    circulate_Flag = True
    while circulate_Flag:
        # if not change, forward phase of IAMB is finished.
        circulate_Flag = False
        # tem_dep pre-set infinite negative.
        temp_dep = -(float)("inf")
        y = None
        variables = [i for i in range(kVar) if i != target and i not in CMB]

        for x in variables:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, CMB, is_discrete)
            # print("target is:",target,",x is: ", x," CMB is: ", CMB," ,pval is: ",pval," ,dep is: ", dep)

            # chose maxsize of f(X:T|CMB)
            if pval <= alaph:
                if dep > temp_dep:
                    temp_dep = dep
                    y = x

        # if not condition independence the node,appended to CMB
        if y is not None:
            # print('appended is :'+str(y))
            CMB.append(y)
            circulate_Flag = True

    return list(set(CMB)), ci_number


# F1 is: 0.75430044955045
# Precision is: 0.8198333333333335
# Recall is: 0.7885833333333332
# time is: 22.64546875

# F1 is: 0.81
# Precision is: 0.89
# Recall is: 0.79
# Distance is: 0.28
# ci_number is: 77.25
# time is: 15.16

# 5000
#
# F1 is: 0.92 ±0.40
# Precision is: 0.94±0.53
# Recall is: 0.94±0.30
# Distance is: 0.12±0.56
# ci_number is: 95.82±38.47
# time is: 74.56±188.69

# F1 is: 0.89
# Precision is: 0.88
# Recall is: 0.94
# Distance is: 0.16
# ci_number is: 97.85
# time is: 88.89

# import  pandas as pd
# data = pd.read_csv("../data/Alarm1_s1000_v1.csv")
# print("the file read")
#
# target = 2
# alaph = 0.01
#
# MBs=IAMB(data, target, alaph, is_discrete=True)
# print("MBs is: "+str(MBs))
Пример #25
0
def MBGSL(data, alpha, is_discrete, selected):
    _, kvar = np.shape(data)
    max_k = 3
    all_MB = [[] for i in range(kvar)]
    all_neighbor = [[] for i in range(kvar)]
    PP = np.zeros((kvar, kvar))
    num_CI = 0

    for i in range(kvar):
        if selected == 1:
            MB, n_c = MMMB(data, i, alpha, is_discrete)
        elif selected == 2:
            MB, n_c = HITON_MB(data, i, alpha, is_discrete)
        elif selected == 3:
            MB, n_c = semi_HITON_MB(data, i, alpha, is_discrete)
        else:
            MB, n_c, dict_cache = PCMB(data, i, alpha, is_discrete)
        num_CI += n_c
        for j in MB:
            PP[i, j] = 1

    # # AND Rule
    # for i in range(kvar):
    #     for j in range(0, i):
    #         if DAG[i, j] != DAG[j, i]:
    #             DAG[i, j] = 0
    #             DAG[j, i] = 0

    for i in range(kvar):
        for j in range(0, i):
            if PP[i, j] != PP[j, i]:
                PP[i, j] = 1
                PP[j, i] = 1

    for i in range(kvar):
        for j in range(kvar):
            if PP[i, j] == 1:
                all_MB[i].append(j)

    # removes the possible spouse links between linked variables X and Y
    for x in range(kvar):
        for y in all_MB[x]:
            vs = set(all_MB[x]).union(set(all_MB[y]))
            varis = list((set(all_MB[x]).difference([y])).union(
                set(all_MB[y]).difference([x])))
            k = 0
            break_flag = False
            while len(varis) > k and k <= max_k:
                ss = subsets(varis, k)
                for s in ss:

                    num_CI += 1
                    pval, _ = cond_indep_test(data, x, y, s, is_discrete)
                    if pval > alpha:
                        PP[x, y] = 0
                        PP[x, y] = 0
                        break_flag = True
                        break
                if break_flag:
                    break
                k += 1

    for i in range(kvar):
        for j in range(kvar):
            if PP[i, j] == 1:
                all_neighbor[i].append(j)

    DAG = PP.copy()
    pdag = DAG.copy()
    G = DAG.copy()

    # orient edges
    for x in range(kvar):
        for y in all_neighbor[x]:
            sz = list((set(all_neighbor[x]).difference(
                all_neighbor[y])).difference([y]))
            for z in sz:
                PP[y, x] = -1
                B = list((set(all_MB[y]).difference([z])).union(
                    set(all_MB[z]).difference([y])))
                break_flag = False
                cutSetSize = 0
                while len(B) >= cutSetSize and cutSetSize == 0:
                    SS = subsets(B, cutSetSize)
                    for s in SS:
                        cond_s = list(set(s).union([x]))

                        num_CI += 1
                        pval, _ = cond_indep_test(data, y, z, cond_s,
                                                  is_discrete)
                        if pval > alpha:
                            PP[y, x] = 1
                            break_flag = True
                            break
                    if break_flag:
                        break
                    cutSetSize += 1
            if PP[y, x] == -1:
                pdag[y, x] = -1
                pdag[x, y] = 0
                G[y, x] = 1
                G[x, y] = 0
                break

    DAG, pdag, G = meek(DAG, pdag, G, kvar)

    return pdag, num_CI
Пример #26
0
def HITON_PC(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    sepset = [[] for i in range(kVar)]
    variDepSet = []
    candidate_PC = []
    PC = []
    ci_number = 0
    noAdmissionSet = []
    max_k = 3

    # use a list to store variables which are not condition independence with
    # target,and sorted by dep max to min
    candidate_Vars = [i for i in range(kVar) if i != target]
    for x in candidate_Vars:
        ci_number += 1
        pval_gp, dep_gp = cond_indep_test(
            data, target, x, [], is_discrete)
        if pval_gp <= alaph:
            variDepSet.append([x, dep_gp])

    # sorted by dep from max to min
    variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True)
    # print(variDepSet)

    # get number by dep from max to min
    for i in range(len(variDepSet)):
        candidate_PC.append(variDepSet[i][0])
    # print(candidate_PC)

    """ sp """
    for x in candidate_PC:

        PC.append(x)
        PC_index = len(PC)
        # if new x add will be removed ,test will not be continue,so break the
        # following circulate to save time ,but i don't not why other index
        # improve
        breakFlagTwo = False

        while PC_index >= 0:
            #  reverse traversal PC,and use PC_index as a pointer of PC
            PC_index -= 1
            y = PC[PC_index]
            breakFlag = False
            conditions_Set = [i for i in PC if i != y]

            if len(conditions_Set) >= max_k:
                Slength = max_k
            else:
                Slength = len(conditions_Set)

            for j in range(Slength + 1):
                SS = subsets(conditions_Set, j)
                for s in SS:
                    ci_number += 1
                    conditions_test_set = [i for i in s]
                    pval_rm, dep_rm = cond_indep_test(
                        data, target, y, conditions_test_set, is_discrete)
                    if pval_rm > alaph:
                        sepset[y] = [i for i in conditions_test_set]
                        # if new x add will be removed ,test will not be
                        # continue
                        if y == x:
                            breakFlagTwo = True
                        PC.remove(y)
                        breakFlag = True
                        break

                if breakFlag:
                    break
            if breakFlagTwo:
                break

    return list(set(PC)), sepset, ci_number
Пример #27
0
def IAMBnPC(data, target, alaph, is_discrete=True):
    CMB = []
    ci_number = 0
    number, kVar = np.shape(data)

    while True:
        variDepSet = []
        Svariables = [i for i in range(kVar) if i != target and i not in CMB]
        # print(Svariables)
        for x in Svariables:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, CMB, is_discrete)
            # print("pval: " + str(pval))
            if pval <= alaph:
                variDepSet.append([x, dep])
        variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True)
        # print(variDepSet)
        if variDepSet == []:
            break
        else:
            CMB.append(variDepSet[0][0])
            # print(CMB)
    """shrinking phase"""
    TestMB = CMB.copy()
    # whether or not sorted TestMB  is not influence,just for elegant!
    TestMB = sorted(TestMB)
    p = len(TestMB)
    DAG = np.ones((1, p))
    size = 0
    continueFlag = True
    # conditionSet maximum set 3
    max_k = 3
    # target_index = TestMB.index(target)
    while continueFlag:
        # Candidate of MBs traverse
        for y in range(p):
            if DAG[0, y] == 0:
                continue
            conditionAllSet = [
                i for i in range(p) if i != y and DAG[0, i] == 1
            ]
            conditionSet = subsets(conditionAllSet, size)
            for S in conditionSet:
                condtionVari = [TestMB[i] for i in S]
                ci_number += 1
                pval_sp, _ = cond_indep_test(data, target, TestMB[y],
                                             condtionVari, is_discrete)
                if pval_sp >= alaph:
                    DAG[0, y] = 0
                    # print("pDAG: \n" + str(DAG))
                    break
        # print("test: \n" + str(DAG))
        size += 1
        continueFlag = False

        # circulate will be continue if condition suited
        if np.sum(DAG[0, :] == 1) >= size and size <= max_k:
            continueFlag = True
    # end while

    # print("DAG is: \n" + str(DAG))
    MB = [TestMB[i] for i in range(p) if DAG[0, i] == 1]

    return MB, ci_number
Пример #28
0
def MBOR(data, target, alaph, is_discrete=True):
    _, kVar = np.shape(data)
    max_k = 3
    ci_number = 0

    PCS, d_sep, ci_num = PCSuperSet(data, target, alaph, is_discrete)
    ci_number += ci_num
    SPS, ci_num = SPSuperSet(data, target, PCS, d_sep, alaph, is_discrete)
    ci_number += ci_num
    MBS = list(set(PCS).union(set(SPS)))

    drop_data_attribute = [
        str(i) for i in range(kVar) if i != target and i not in MBS
    ]
    data_new = data.drop(drop_data_attribute, axis=1)
    data_attribute = [i for i in range(kVar) if i == target or i in MBS]

    PC, ci_num = MBtoPC(data_new, target, alaph, data_attribute, is_discrete)
    ci_number += ci_num
    PCS_rmPC = [i for i in PCS if i not in PC]
    for x in PCS_rmPC:
        x_pcset, ci_num = MBtoPC(data_new, x, alaph, data_attribute,
                                 is_discrete)

        ci_number += ci_num
        if target in x_pcset:
            PC.append(x)

    SP = []
    for x in PC:
        data_attribute = [i for i in range(kVar) if i != target]
        x_pcset, ci_num = MBtoPC(data, x, alaph, data_attribute, is_discrete)
        ci_number += ci_num
        vari_set = [i for i in x_pcset if i != target and i not in PC]
        for y in vari_set:
            break_flag = False
            condition_all_set = [i for i in MBS if i != target and i != y]
            clength = len(condition_all_set)
            if clength > max_k:
                clength = max_k
            for j in range(clength + 1):
                condition_set = subsets(condition_all_set, j)
                for Z in condition_set:
                    ci_number += 1
                    pval, _ = cond_indep_test(data, target, y, Z, is_discrete)
                    if pval > alaph:
                        if break_flag:
                            break
                        else:
                            # Find minimal Z ⊂ MBS\{T ∪ Y } such that T ⊥ Y |Z
                            break_flag = True
                            condition_varis = [i for i in Z]
                            condition_varis.append(x)
                            condition_varis = list(set(condition_varis))
                            ci_number += 1
                            pval, _ = cond_indep_test(data, target, y,
                                                      condition_varis,
                                                      is_discrete)
                            if pval <= alaph:
                                SP.append(y)
                if break_flag:
                    break

    MB = list(set(PC).union(set(SP)))
    return MB, ci_number