def RecognizePC(data, target, ADJT, alaph, is_discrete=True): number, kVar = np.shape(data) NonPC = [] cutSetSize = 0 sepset = [[] for i in range(kVar)] ci_number = 0 while len(ADJT) > cutSetSize: for x in ADJT: ADJT_X = [i for i in ADJT if i != x] SSubset = subsets(ADJT_X, cutSetSize) for S in SSubset: ci_number += 1 pval_gp, dep_gp = cond_indep_test(data, target, x, S, is_discrete) if pval_gp > alaph: NonPC.append(x) sepset[x] = [i for i in S] break if len(NonPC) > 0: ADJT = [i for i in ADJT if i not in NonPC] cutSetSize += 1 NonPC = [] else: break return ADJT, sepset, ci_number
def getMinDep(data, target, x, CPC, alpha, is_discrete): """this function is to chose min dep(association) about Target,x|(subsets of CPC)""" ci_number = 0 dep_min = float("inf") max_k = 3 # 在这图中很少一个节点的Perents或child(其中一个)超过三个,即最多图中a->b,c,d->z,所以最多条件集三个(a,z)|(b,c,d) # 便可测试出(a,z)是否独立,可极大得减少时间复杂度 if len(CPC) > max_k: k_length = max_k else: k_length = len(CPC) for i in range(k_length+1): SS = subsets(CPC, i) for S in SS: ci_number += 1 pval, dep = cond_indep_test(data, target, x, S, is_discrete) # this judge about target and x whether or not is condition independence ,if true,dep must be zero, # and end operating of function of getMinDep if pval > alpha: return 0, S ,ci_number if dep_min > dep: dep_min = dep return dep_min, None, ci_number
def pc_simple(data, target, alaph, isdiscrete): number, kVar = np.shape(data) ciTest = 0 k = 0 # chose all variables except target itself PC = [i for i in range(kVar) if i != target] while len(PC) > k: PC_temp = PC.copy() for x in PC_temp: # see number of circulate condition_subsets = [i for i in PC_temp if i != x] if len(condition_subsets) >= k: # get a difinite number of subsets of condition_subsets css = subsets(condition_subsets, k) for s in css: # every k length of subsets should test chi square and if # make x and target CI,x removed pval, dep = cond_indep_test(data, x, target, s, isdiscrete) ciTest += 1 if pval > alaph: PC.remove(x) break # end circulate of s k += 1 return PC, ciTest
def TIE_p(data, target, alaph, isdiscrete): number, kVar = np.shape(data) M = [] G = [] max_k = 3 not_in_set = [] possible_subests = [] variable = [i for i in range(kVar)] MB, _ = IAMB(data, target, alaph, variable, isdiscrete) accurary_MB = eva_classifier(data, target, MB) M.append(MB) G.append([]) index = 0 s_index = 0 MB_new_set = [] while True: length = len(M[index]) if length > max_k: length = max_k for j in range(length + 1): if j == 0: continue varis_set = subsets(M[index], j) # print("varis_set is: " + str(varis_set)) for x in varis_set: break_Flag = False for y in not_in_set: if set(x).issuperset(set(y)): # print("x is: " + str(x) + " , y is: " + str(y)) break_Flag = True break if not break_Flag: vari_one = list(set(x).union(set(G[index]))) # print("vari_one is: " + str(vari_one)) if vari_one not in possible_subests: possible_subests.append(vari_one) # print("possible_subsets is: " + str(possible_subests)) if s_index < len(possible_subests): excpet_varis_set = possible_subests[s_index] s_index += 1 else: break variable_new = [i for i in range(kVar) if i not in excpet_varis_set] MB_new, _ = IAMB(data, target, alaph, variable_new, isdiscrete) if MB_new == [] or MB_new in MB_new_set: continue # accurary_MB = eva_classifier(data, target, MB) accurary_MB_new = eva_classifier(data, target, MB_new) if accurary_MB <= accurary_MB_new: MB_new_set.append(MB_new) M.append(MB_new) G.append(excpet_varis_set) index += 1 return MB_new_set
def MBtoPC(data, target, alaph, attribute, is_discrete): max_k = 3 ci_number = 0 MB, ci_num = IAMB(data, target, alaph, attribute, is_discrete) ci_number += ci_num PC = MB.copy() for x in MB: break_flag = False condtion_sets_all = [i for i in MB if i != x] c_length = len(condtion_sets_all) if c_length > max_k: c_length = max_k for j in range(c_length + 1): condtion_sets = subsets(condtion_sets_all, j) for Z in condtion_sets: ci_number += 1 pval, _ = cond_indep_test(data, target, x, Z, is_discrete) if pval > alaph: PC.remove(x) break_flag = True break if break_flag: break return PC, ci_number
def HITON_PC(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) sepset = [[] for i in range(kVar)] variDepSet = [] candidate_PC = [] PC = [] ci_number = 0 noAdmissionSet = [] max_k = 3 # use a list to store variables which are not condition independence with # target,and sorted by dep max to min candidate_Vars = [i for i in range(kVar) if i != target] for x in candidate_Vars: ci_number += 1 pval_gp, dep_gp = cond_indep_test( data, target, x, [], is_discrete) if pval_gp <= alaph: variDepSet.append([x, dep_gp]) # sorted by dep from max to min variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True) # print(variDepSet) # get number by dep from max to min for i in range(len(variDepSet)): candidate_PC.append(variDepSet[i][0]) # print(candidate_PC) """ sp """ for x in candidate_PC: PC.append(x) PC_index = len(PC) # if new x add will be removed ,test will not be continue,so break the # following circulate to save time ,but i don't not why other index # improve breakFlagTwo = False while PC_index >= 0: # reverse traversal PC,and use PC_index as a pointer of PC PC_index -= 1 y = PC[PC_index] breakFlag = False conditions_Set = [i for i in PC if i != y] if len(conditions_Set) >= max_k: Slength = max_k else: Slength = len(conditions_Set) for j in range(Slength + 1): SS = subsets(conditions_Set, j) for s in SS: ci_number += 1 conditions_test_set = [i for i in s] pval_rm, dep_rm = cond_indep_test( data, target, y, conditions_test_set, is_discrete) if pval_rm > alaph: sepset[y] = [i for i in conditions_test_set] # if new x add will be removed ,test will not be # continue if y == x: breakFlagTwo = True PC.remove(y) breakFlag = True break if breakFlag: break if breakFlagTwo: break return list(set(PC)), sepset, ci_number
def IAMBnPC(data, target, alaph, is_discrete=True): CMB = [] ci_number = 0 number, kVar = np.shape(data) while True: variDepSet = [] Svariables = [i for i in range(kVar) if i != target and i not in CMB] # print(Svariables) for x in Svariables: ci_number += 1 pval, dep = cond_indep_test(data, target, x, CMB, is_discrete) # print("pval: " + str(pval)) if pval <= alaph: variDepSet.append([x, dep]) variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True) # print(variDepSet) if variDepSet == []: break else: CMB.append(variDepSet[0][0]) # print(CMB) """shrinking phase""" TestMB = CMB.copy() # whether or not sorted TestMB is not influence,just for elegant! TestMB = sorted(TestMB) p = len(TestMB) DAG = np.ones((1, p)) size = 0 continueFlag = True # conditionSet maximum set 3 max_k = 3 # target_index = TestMB.index(target) while continueFlag: # Candidate of MBs traverse for y in range(p): if DAG[0, y] == 0: continue conditionAllSet = [ i for i in range(p) if i != y and DAG[0, i] == 1 ] conditionSet = subsets(conditionAllSet, size) for S in conditionSet: condtionVari = [TestMB[i] for i in S] ci_number += 1 pval_sp, _ = cond_indep_test(data, target, TestMB[y], condtionVari, is_discrete) if pval_sp >= alaph: DAG[0, y] = 0 # print("pDAG: \n" + str(DAG)) break # print("test: \n" + str(DAG)) size += 1 continueFlag = False # circulate will be continue if condition suited if np.sum(DAG[0, :] == 1) >= size and size <= max_k: continueFlag = True # end while # print("DAG is: \n" + str(DAG)) MB = [TestMB[i] for i in range(p) if DAG[0, i] == 1] return MB, ci_number
def MBbyMB(data, target, alpha, is_discrete=True): ci_test = 0 max_k = 3 _, kvar = np.shape(data) DAG = np.zeros((kvar, kvar)) pdag = DAG.copy() G = DAG.copy() mb_calcualted = [True for i in range(kvar)] all_pc = [[] for i in range(kvar)] all_mb = [[] for i in range(kvar)] all_can_spouse = [[] for i in range(kvar)] all_sepset = [[[]] * kvar for i in range(kvar)] Q = [target] tmp = [] num_calculated = 0 while len(tmp) <= kvar and len(Q) > 0: A = Q[0] del Q[0] if A in tmp: continue else: tmp.append(A) # get MB(A) if mb_calcualted[A]: all_mb[A], ntest = MMMB(data, A, alpha, is_discrete) ci_test += ntest mb_calcualted[A] = False all_pc[A] = all_mb[A].copy() for B in all_mb[A]: Q.append(B) DAG[A, B] = 1 DAG[B, A] = 1 if pdag[A, B] == 0 and pdag[B, A] == 0: pdag[A, B] = 1 pdag[B, A] = 1 G[A, B] = 1 G[B, A] = 1 cutSetSize = 0 break_flag = False can_pc = [i for i in all_mb[A] if i != B] while len(can_pc) >= cutSetSize and cutSetSize <= max_k: SS = subsets(can_pc, cutSetSize) for z in SS: ci_test += 1 pval, _ = cond_indep_test(data, B, A, z, is_discrete) if pval > alpha: all_sepset[A][B] = [i for i in z] all_sepset[B][A] = [i for i in z] DAG[A, B] = 0 DAG[B, A] = 0 pdag[A, B] = 0 pdag[B, A] = 0 G[A, B] = 0 G[B, A] = 0 all_pc[A] = [i for i in all_pc[A] if i != B] all_can_spouse[A].append(B) break_flag = True break if break_flag: break cutSetSize += 1 # print("all_sepset: ", all_sepset) # find v-structures for C in all_can_spouse[A]: for B in all_pc[A]: # A->B<-C if B not in all_sepset[A][C]: DAG[A, B] = 1 DAG[B, A] = 1 pdag[A, B] = -1 pdag[B, A] = 0 pdag[C, B] = -1 pdag[B, C] = 0 G[A, B] = 1 G[B, A] = 0 G[C, B] = 1 G[B, C] = 0 [DAG, pdag, G] = meek(DAG, pdag, G, kvar) num_calculated += 1 if num_calculated > len(all_mb[target]): if 1 not in pdag[target, :] and 1 not in pdag[:, target]: break parents = [i for i in range(kvar) if pdag[i, target] == -1] children = [i for i in range(kvar) if pdag[target, i] == -1] undirected = [i for i in range(kvar) if pdag[target, i] == 1] PC = list(set(parents).union(set(children)).union(set(undirected))) return parents, children, PC, undirected # import warnings # warnings.filterwarnings('ignore') # import pandas as pd # data = pd.read_csv("D:/data/alarm_data/Alarm1_s5000_v6.csv") # print("the file read") # import numpy as np # num1, kvar = np.shape(data) # alaph = 0.01 # # for target in range(kvar): # P, C, PC, und = MBbyMB(data, target, alaph, True) # print(target," -P: ", P, " ,C: ", C, " ,PC: ", PC, " ,undire: ",und)
def BAMB(data, target, alaph, is_discrete=True): ci_number = 0 number, kVar = np.shape(data) max_k = 3 CPC = [] TMP = [i for i in range(kVar) if i != target] sepset = [[] for i in range(kVar)] CSPT = [[] for i in range(kVar)] variDepSet = [] SP = [[] for i in range(kVar)] PC = [] for x in TMP: ci_number += 1 pval_f, dep_f = cond_indep_test(data, target, x, [], is_discrete) if pval_f > alaph: sepset[x] = [] else: variDepSet.append([x, dep_f]) variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True) """step one: Find the candidate set of PC and candidate set of spouse""" # print("variDepSet" + str(variDepSet)) for variIndex in variDepSet: A = variIndex[0] # print("A is: " + str(A)) Slength = len(CPC) if Slength > max_k: Slength = 3 breakFlag = False for j in range(Slength + 1): ZSubsets = subsets(CPC, j) for Z in ZSubsets: ci_number += 1 convari = [i for i in Z] pval_TAZ, dep_TAZ = cond_indep_test(data, target, A, convari, is_discrete) if pval_TAZ > alaph: sepset[A] = convari breakFlag = True # print("ZZZ") break if breakFlag: break if not breakFlag: CPC_ReA = CPC.copy() B_index = len(CPC_ReA) CPC.append(A) breakF = False while B_index > 0: B_index -= 1 B = CPC_ReA[B_index] flag1 = False conditionSet = [i for i in CPC_ReA if i != B] Clength = len(conditionSet) if Clength > max_k: Clength = max_k for j in range(Clength + 1): CSubsets = subsets(conditionSet, j) for Z in CSubsets: ci_number += 1 convari = [i for i in Z] pval_TBZ, dep_TBZ = cond_indep_test( data, target, B, convari, is_discrete) # print("pval_TBZ: " + str(pval_TBZ)) if pval_TBZ >= alaph: CPC.remove(B) CSPT[B] = [] sepset[B] = convari flag1 = True if B == A: breakF = True if flag1: break if breakF: break CSPT[A] = [] pval_CSPT = [] # add candidate of spouse # print("sepset: " + str(sepset)) for C in range(kVar): if C == target or C in CPC: continue conditionSet = [i for i in sepset[C]] conditionSet.append(A) conditionSet = list(set(conditionSet)) ci_number += 1 pval_CAT, _ = cond_indep_test(data, target, C, conditionSet, is_discrete) if pval_CAT <= alaph: CSPT[A].append(C) pval_CSPT.append([C, pval_CAT]) """step 2-1""" pval_CSPT = sorted(pval_CSPT, key=lambda x: x[1], reverse=False) SP[A] = [] # print("CSPT-: " +str(CSPT)) # print("pval_CSPT is: " + str(pval_CSPT)) for pCSPT_index in pval_CSPT: E = pCSPT_index[0] # print("E is:" + str(E)) SP[A].append(E) index_spa = len(SP[A]) breakflag_spa = False # print("SP[A] is: " +str(SP[A])) while index_spa >= 0: index_spa -= 1 x = SP[A][index_spa] breakFlag = False # print("x is:" + str(x)) ZAllconditionSet = [i for i in SP[A] if i != x] # print("ZAllconditionSet is:" + str(ZAllconditionSet)) for Z in ZAllconditionSet: conditionvari = [Z] if A not in conditionvari: conditionvari.append(A) ci_number += 1 pval_TXZ, _ = cond_indep_test(data, target, x, conditionvari, is_discrete) # print("x is: " + str(x) + "conditionvari: " + str(conditionvari) + " ,pval_TXZ is: " + str(pval_TXZ)) if pval_TXZ > alaph: # print("spa is: " + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(conditionvari)) SP[A].remove(x) breakFlag = True if x == E: breakflag_spa = True break if breakFlag: break if breakflag_spa: break """step 2-2""" # remove x from pval_CSPT pval_CSPT_new = [] plength = len(pval_CSPT) for i in range(plength): if pval_CSPT[i][0] in SP[A]: pval_CSPT_new.append(pval_CSPT[i]) CSPT[A] = SP[A] SP[A] = [] # print("CSPT-: " + str(CSPT)) # print("2222222pval_CSPT_new is: " + str(pval_CSPT_new)) for pCSPT_index in pval_CSPT_new: E = pCSPT_index[0] # print("E2 is:" + str(E)) SP[A].append(E) index_spa = len(SP[A]) breakflag_spa = False # print("SP[A] is: " + str(SP[A])) while index_spa >= 0: index_spa -= 1 x = SP[A][index_spa] breakFlag = False # print("x is:" + str(x)) ZAllSubsets = list(set(CPC).union(set(SP[A]))) # print("CPC is: " + str(CPC) + " , SP[A] is: " + str(SP[A]) + " ,A is" + str(A) + " ,x is:" + str(x) + " ,ZA is: " + str(ZAllSubsets)) ZAllSubsets.remove(x) ZAllSubsets.remove(A) # print("-ZALLSubsets has: " + str(ZAllSubsets)) Zalength = len(ZAllSubsets) if Zalength > max_k: Zalength = max_k for j in range(Zalength + 1): ZaSubsets = subsets(ZAllSubsets, j) for Z in ZaSubsets: Z = [i for i in Z] ci_number += 1 pval_TXZ, _ = cond_indep_test( data, A, x, Z, is_discrete) # print("Z is: " + str(Z) + " ,A is: " + str(A) + " ,x is: " + str(x) + " ,pval_txz is: " + str(pval_TXZ)) if pval_TXZ > alaph: # print("spa is:" + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(Z)) SP[A].remove(x) breakFlag = True if x == E: breakflag_spa = True break if breakFlag: break if breakflag_spa: break """ step 2-3""" pval_CSPT_fin = [] plength = len(pval_CSPT) for i in range(plength): if pval_CSPT[i][0] in SP[A]: pval_CSPT_fin.append(pval_CSPT[i]) CSPT[A] = SP[A] SP[A] = [] # print("CSPT-: " +str(CSPT)) # print("2222222pval_CSPT_fin is: " + str(pval_CSPT_fin)) for pCSPT_index in pval_CSPT_fin: E = pCSPT_index[0] # print("E3 is:" + str(E)) SP[A].append(E) index_spa = len(SP[A]) breakflag_spa = False # print("SP[A] is: " + str(SP[A])) while index_spa >= 0: index_spa -= 1 x = SP[A][index_spa] breakFlag = False # print("x is:" + str(x)) ZAllSubsets = list(set(CPC).union(set(SP[A]))) ZAllSubsets.remove(x) ZAllSubsets.remove(A) Zalength = len(ZAllSubsets) # print("=-ZALLSubsets has: " + str(ZAllSubsets)) if Zalength > max_k: Zalength = max_k for j in range(Zalength + 1): ZaSubsets = subsets(ZAllSubsets, j) # print("ZzSubsets is: " + str(ZaSubsets)) for Z in ZaSubsets: Z = [i for i in Z] Z.append(A) # print("Z in ZaSubsets is: " + str(Z)) ci_number += 1 pval_TXZ, _ = cond_indep_test( data, target, x, Z, is_discrete) # print("-Z is: " + str(Z) + " ,x is: " + str(x) + " ,pval_txz is: " + str( # pval_TXZ)) if pval_TXZ >= alaph: # print("spa is:" + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(Z)) SP[A].remove(x) if x == E: breakflag_spa = True breakFlag = True break if breakFlag: break if breakflag_spa: break # print("SP[A]------: " + str(SP[A])) CSPT[A] = SP[A] # print("CSPT is: " + str(CSPT)) """step3: remove false positives from the candidate set of PC""" CPC_temp = CPC.copy() x_index = len(CPC_temp) A_breakFlag = False # print("-CPC-: " + str(CPC)) while x_index >= 0: x_index -= 1 x = CPC_temp[x_index] flag2 = False ZZALLsubsets = [i for i in CPC if i != x] # print("xx is: " + str(x) + ", ZZALLsubsets is: " + str(ZZALLsubsets )) Zlength = len(ZZALLsubsets) if Zlength > max_k: Zlength = max_k for j in range(Zlength + 1): Zzsubsets = subsets(ZZALLsubsets, j) for Z in Zzsubsets: conditionSet = [ i for y in Z for i in CSPT[y] if i not in CPC ] conditionSet = list(set(conditionSet).union(set(Z))) # print("conditionSet: " + str(conditionSet)) ci_number += 1 pval, _ = cond_indep_test(data, target, x, conditionSet, is_discrete) if pval >= alaph: # print("remove x is: " + str(x) + " , pval is: " + str(pval) + " ,conditionset is: " + str(conditionSet)) CPC.remove(x) CSPT[x] = [] flag2 = True if x == A: A_breakFlag = True break if flag2: break if A_breakFlag: break # print("SP is:" + str(SP)) spouseT = [j for i in CPC for j in CSPT[i]] MB = list(set(CPC).union(set(spouseT))) return MB, ci_number
def pc(Data, alpha): time_start = time.time() ind_test = 0 # the number of condition independency test Num, NbVar = Data.shape sepset = [[[]] * NbVar for i in range(NbVar)] DAG = np.ones((NbVar, NbVar)) for i in range(NbVar): DAG[i][i] = 0 # stage 1: construct skeletons n = 0 done = False while not done: done = True for x in range(NbVar): adjx = [i for i in range(NbVar) if DAG[x, i] == 1] if len(adjx) >= n: done = False for y in adjx: cx_y = [i for i in adjx if i != y] SS = subsets(cx_y, n) # cx_y = my_set_diff_two(adjx, y) # Adj(c,x)\{y} # SS = list(combinations(cx_y, n)) for S in SS: #sub_data_script = [x, y] + list(map(int, S)) #xyz_data = Data[:, sub_data_script] # (X, target, subset) #pval = mi_test(xyz_data) # use MI to test conditional independence # _, pval, _, _ = chi_square_test(Data, x, y, list(map(int, S))) pval, _ = cond_indep_test(Data, x, y, list(map(int, S)), True) ind_test = ind_test + 1 if pval > alpha: DAG[x, y] = 0 DAG[y, x] = 0 if list(map(int, S)) not in sepset[x][y]: sepset[x][y].append(list(map(int, S))) if list(map(int, S)) not in sepset[y][x]: sepset[y][x].append(list(map(int, S))) break n += 1 # stage 2: create V structures orient X-Y-Z => X -> Y <- Z print('stage 2') pDAG = DAG.copy() gtmp = DAG.copy() X = [i for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1] Y = [j for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1] for i in range(len(X)): x = X[i] y = Y[i] Z = [j for j in range(NbVar) if DAG[y, j] == 1 and j != x] for z in Z: if DAG[x, z] == 0 and [y] not in sepset[x][z]: pDAG[x, y] = -1 pDAG[y, x] = 0 pDAG[z, y] = -1 pDAG[y, z] = 0 gtmp[x, y] = 1 gtmp[y, x] = 0 gtmp[z, y] = 1 gtmp[y, z] = 0 # stage 3:edge oriented print('stage 3') old_pDAG = np.zeros((NbVar, NbVar)) iter = 0 while not (pDAG == old_pDAG).all(): iter += 1 old_pDAG = pDAG # rule 1: A->B--C ==>B->C A = [i for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == -1] B = [j for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == -1] for i in range(len(A)): a = A[i] b = B[i] C = [ j for j in range(NbVar) if pDAG[b][j] == 1 and pDAG[a][j] == 0 ] for c in C: pDAG[b][c] = -1 pDAG[c][b] = 0 gtmp[b][c] = 1 gtmp[c][b] = 0 # rule 2: A->C->B,A--B=>A->B A = [i for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1] B = [j for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1] for i in range(len(A)): a = A[i] b = B[i] if np.any( np.multiply(np.array(pDAG[a, :] == -1), np.array(pDAG[:, b] == -1))): pDAG[a][b] = -1 pDAG[b][a] = 0 gtmp[a][b] = 1 gtmp[b][a] = 0 # rule 3: % a--c->b, a--d->b, pDAG(c,d)=pDAG(d,c)=0, a--b => a->b A = [i for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1] B = [j for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1] for i in range(len(A)): a = A[i] b = B[i] C = [ j for j in range(NbVar) if pDAG[a][j] == 1 and pDAG[j][b] == -1 ] for c in C: for d in C: if pDAG[c][d] == 0 and c != d: pDAG[a][b] = -1 pDAG[b][a] = 0 gtmp[a][b] = 1 gtmp[b][a] = 0 break time_end = time.time() time_cost = time_end - time_start print('running time is:', time_cost, 's') return pDAG, ind_test
def getPCD(data, target, alaph, is_discrete): number, kVar = np.shape(data) max_k = 3 PCD = [] ci_number = 0 # use a list of sepset[] to store a condition set which can make target and the variable condition independence # the above-mentioned variable will be remove from CanPCD or PCD sepset = [[] for i in range(kVar)] while True: variDepSet = [] CanPCD = [i for i in range(kVar) if i != target and i not in PCD] CanPCD_temp = CanPCD.copy() for vari in CanPCD_temp: breakFlag = False dep_gp_min = float("inf") vari_min = -1 if len(PCD) >= max_k: Plength = max_k else: Plength = len(PCD) for j in range(Plength + 1): SSubsets = subsets(PCD, j) for S in SSubsets: ci_number += 1 pval_gp, dep_gp = cond_indep_test(data, target, vari, S, is_discrete) if pval_gp > alaph: vari_min = -1 CanPCD.remove(vari) sepset[vari] = [i for i in S] breakFlag = True break elif dep_gp < dep_gp_min: dep_gp_min = dep_gp vari_min = vari if breakFlag: break # use a list of variDepset to store list, like [variable, its dep] if vari_min in CanPCD: variDepSet.append([vari_min, dep_gp_min]) # sort list of variDepSet by dep from max to min variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True) # if variDepset is null ,that meaning PCD will not change if variDepSet != []: y = variDepSet[0][0] PCD.append(y) pcd_index = len(PCD) breakALLflag = False while pcd_index >= 0: pcd_index -= 1 x = PCD[pcd_index] breakFlagTwo = False conditionSetALL = [i for i in PCD if i != x] if len(conditionSetALL) >= max_k: Slength = max_k else: Slength = len(conditionSetALL) for j in range(Slength + 1): SSubsets = subsets(conditionSetALL, j) for S in SSubsets: ci_number += 1 pval_sp, dep_sp = cond_indep_test( data, target, x, S, is_discrete) if pval_sp > alaph: PCD.remove(x) if x == y: breakALLflag = True sepset[x] = [i for i in S] breakFlagTwo = True break if breakFlagTwo: break if breakALLflag: break else: break return list(set(PCD)), sepset, ci_number
def STMB(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) ci_number = 0 PCT = [i for i in range(kVar) if i != target] PCT, sepset, ci_num2 = RecognizePC(data, target, PCT, alaph, is_discrete) ci_number += ci_num2 spouse = [[] for i in range(kVar)] remove = [] for y in PCT: X_set = [i for i in range(kVar) if i != target and i not in PCT] # print("y: " + str(y) + " ,X_set is:" + str(X_set)) breakFlag = False for x in X_set: conditionsSet = [str(i) for i in sepset[x]] conditionsSet.append(str(y)) conditionsSet = list(set(conditionsSet)) ci_number += 1 pval_xt, dep_xt = cond_indep_test(data, target, x, conditionsSet, is_discrete) # print("x is: " + str(x) + " conditionSet is: " + str(conditionsSet) + "pval_xt is: " + str(pval_xt)) if pval_xt <= alaph: Zset = [i for i in PCT] Zset.append(x) Zset = list(set(Zset)) if y in Zset: Zset.remove(y) if len(Zset) >= 3: Zlength = 3 else: Zlength = len(Zset) # Zlength +1 is important! for j in range(Zlength + 1): Zsubsets = subsets(Zset, j) for Z in Zsubsets: ci_number += 1 pval_yt, dep_yt = cond_indep_test( data, target, y, Z, is_discrete) if pval_yt > alaph: # print("remove append is: " + str(y)) remove.append(y) breakFlag = True break if breakFlag: break if breakFlag: break else: spouse[y].append(x) # print("x append is: " + str(x) + " ,spouse[" + str(y) + "] has: " + str(spouse[y])) PCT = [i for i in PCT if i not in remove] # print("-PCT has: " + str(PCT)) # print(len(spouse)) # print("spouse is:" + str(spouse)) for y in range(len(spouse)): # print("y is: " + str(y)) if spouse[y] != []: spouseY_temp = spouse[y].copy() # print("spouse[" +str(y)+ "] has: " + str(spouse[y])) for x in spouseY_temp: testSet = [ i for i in range(kVar) if i in PCT or i in spouse[y] ] testSet = list(set(testSet)) # print("testSet has: " + str(testSet)) if x in testSet: testSet.remove(x) ci_number += 1 pval_xt_testset, _ = cond_indep_test(data, target, x, testSet, is_discrete) if pval_xt_testset > alaph: # print("spouse[y] had: " + str(spouse[y])) spouse[y].remove(x) # print("spouse[y] now has: " + str(spouse[y])) M_variSet = PCT.copy() # print("M_variSet is:" + str(M_variSet)) for x in M_variSet: conditionsVariSet = [i for j in range(len(spouse)) for i in spouse[j]] conditionsVariSet = list(set(conditionsVariSet).union(set(PCT))) # print("conditionsVariSet is: " + str(conditionsVariSet)) if x in conditionsVariSet: conditionsVariSet.remove(x) ci_number += 1 pval_final, _ = cond_indep_test(data, target, x, conditionsVariSet, is_discrete) if pval_final > alaph: PCT.remove(x) spouse = [i for j in range(len(spouse)) for i in spouse[j]] MB = list(set(PCT).union(set(spouse))) return MB, ci_number # import pandas as pd # data = pd.read_csv("C:/pythonProject/pyCausalFS/data/child_s500_v3.csv") # print("the file read") # # target = 11 # alaph = 0.05 # # MBs=STMB(data, target, alaph, is_discrete=False) # print("MBs is: "+str(MBs)) # F1 is: 0.7526467421467425 # Precision is: 0.8019166666666667 # Recall is: 0.7789583333333334 # time is: 11.730078125 #5000 # F1 is: 0.86 # Precision is: 0.86 # Recall is: 0.87 # Distance is: 0.21 # ci_number is: 142.295 # time is: 70.24
def LRH(data, target, alaph, is_discrete=True): ci_number = 0 number, kVar = np.shape(data) max_k = 3 M = [] while True: # selection M1 = [] x_dep_set = [] variables = [i for i in range(kVar) if i != target and i not in M] for x in variables: ci_number += 1 pval, dep = cond_indep_test(data, target, x, M, is_discrete) if pval <= alaph: M1.append(x) x_dep_set.append([x, dep]) # exclusion if M1 == []: break elif len(M1) == 1: M.append(M1[0]) continue M2 = [] # print("M is: " + str(M)) # print("M1 is: " + str(M1)) for x in M1: # print("x is: " + str(x)) NX = [] vari_set = [i for i in M1 if i != x] for y in vari_set: ci_number += 1 pval, _ = cond_indep_test(data, x, y, M, is_discrete) if pval <= alaph: NX.append(y) # print("NX is:" + str(NX)) Nlength = len(NX) if Nlength > max_k: Nlength = 3 break_flag = False for j in range(Nlength + 1): Z_set = subsets(NX, j) for Z in Z_set: conditionset = list(set(Z).union(set(M))) ci_number += 1 pval, _ = cond_indep_test(data, target, x, conditionset, is_discrete) # print("pval is: " + str(pval) + " ,x is: " + str(x) + " ,conditionset is: " + str(conditionset)) if pval > alaph: break_flag = True break if break_flag: break if not break_flag: M2.append(x) # print("M2 append is: " + str(M2)) # print("M2 is: " + str(M2)) Y = [] if M2 == []: x_dep_set = sorted(x_dep_set, key=lambda x: x[1], reverse=True) # print("-x_dep_set is: " + str(x_dep_set)) if x_dep_set != []: dep_max = x_dep_set[0][1] for m in x_dep_set: if m[1] == dep_max: Y.append(m[0]) else: break else: x_dep_set = [] for x in M2: ci_number += 1 pval, dep = cond_indep_test(data, target, x, M, is_discrete) if pval <= alaph: x_dep_set.append([x, dep]) x_dep_set = sorted(x_dep_set, key=lambda x: x[1], reverse=True) # print("--x_dep_set is: " + str(x_dep_set)) if x_dep_set != []: dep_max = x_dep_set[0][1] for m in x_dep_set: if m[1] == dep_max: Y.append(m[0]) else: break # M3 = [i for i in M1 if i not in M2] M = list(set(M).union(set(Y))) # print("-M is: " + str(M)) M_temp = M.copy() for x in M_temp: conditionset = [i for i in M if i != x] ci_number += 1 pval, _ = cond_indep_test(data, target, x, conditionset, is_discrete) # print("pval is: " + str(pval) + " , x is: " + str(x)) if pval > alaph: M.remove(x) return M, ci_number # data = pd.read_csv("C:/pythonProject/pyCausalFS/data/child_s500_v1.csv") # print("the file read") # # target = 19 # alaph = 0.01 # # MB = LRH(data, target, alaph) # print("MBs is: " + str(MB)) # 500 # F1 is: 0.76 # Precision is: 0.85 # Recall is: 0.76 # Distance is: 0.34 # ci_number is: 331.96 # time is: 43.03 # 5000 # F1 is: 0.91 # Precision is: 0.90 # Recall is: 0.94 # Distance is: 0.14 # ci_number is: 1.00 # time is: 238.92
def STMB(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) ci_number = 0 PCT = [i for i in range(kVar) if i != target] PCT, sepset, ci_num2 = RecognizePC(data, target, PCT, alaph, is_discrete) ci_number += ci_num2 spouse = [[] for i in range(kVar)] remove = [] for y in PCT: X_set = [i for i in range(kVar) if i != target and i not in PCT] # print("y: " + str(y) + " ,X_set is:" + str(X_set)) breakFlag = False for x in X_set: conditionsSet = [i for i in sepset[x]] conditionsSet.append(y) conditionsSet = list(set(conditionsSet)) ci_number += 1 pval_xt, dep_xt = cond_indep_test(data, target, x, conditionsSet, is_discrete) # print("x is: " + str(x) + " conditionSet is: " + str(conditionsSet) + "pval_xt is: " + str(pval_xt)) if pval_xt <= alaph: Zset = [i for i in PCT] Zset.append(x) Zset = list(set(Zset)) if y in Zset: Zset.remove(y) if len(Zset) >= 3: Zlength = 3 else: Zlength = len(Zset) # Zlength +1 is important! for j in range(Zlength + 1): Zsubsets = subsets(Zset, j) for Z in Zsubsets: ci_number += 1 pval_yt, dep_yt = cond_indep_test( data, target, y, Z, is_discrete) if pval_yt > alaph: # print("remove append is: " + str(y)) remove.append(y) breakFlag = True break if breakFlag: break if breakFlag: break else: spouse[y].append(x) PCT = [i for i in PCT if i not in remove] for y in range(len(spouse)): if spouse[y] != []: spouseY_temp = spouse[y].copy() for x in spouseY_temp: testSet = [ i for i in range(kVar) if i in PCT or i in spouse[y] ] testSet = list(set(testSet)) # print("testSet has: " + str(testSet)) if x in testSet: testSet.remove(x) ci_number += 1 pval_xt_testset, _ = cond_indep_test(data, target, x, testSet, is_discrete) if pval_xt_testset > alaph: # print("spouse[y] had: " + str(spouse[y])) spouse[y].remove(x) # print("spouse[y] now has: " + str(spouse[y])) M_variSet = PCT.copy() # print("M_variSet is:" + str(M_variSet)) for x in M_variSet: conditionsVariSet = [i for j in range(len(spouse)) for i in spouse[j]] conditionsVariSet = list(set(conditionsVariSet).union(set(PCT))) # print("conditionsVariSet is: " + str(conditionsVariSet)) if x in conditionsVariSet: conditionsVariSet.remove(x) ci_number += 1 pval_final, _ = cond_indep_test(data, target, x, conditionsVariSet, is_discrete) if pval_final > alaph: PCT.remove(x) spouse = [i for j in range(len(spouse)) for i in spouse[j]] MB = list(set(PCT).union(set(spouse))) return MB, ci_number
def MB_by_MB(data, target, alaph, is_discrete=True): n, p = np.shape(data) Donelist = [] # whose MBs have been found Waitlist = [target] # whose MBs will be foundM G = np.zeros((p, p)) # 1 denotes ->, 0 denote no edges pdag = G.copy() # -1 denotes ->, 0 denote no edges DAG = G.copy() # 1 denote -,0 denote no edges MB = [[] for i in range(p)] sepset = [[[]] * p for i in range(p)] k = 3 while Waitlist != []: stop = False Waitlist_temp = Waitlist.copy() for x in Waitlist_temp: spouse = [[] for i in range(p)] Donelist.append(x) Waitlist.remove(x) MB[x], _ = IAMB(data, x, alaph, is_discrete) for i in MB[x]: Waitlist.append(i) findflag = False for i in range(len(MB)): if set(MB[x]) < set(MB[i]): findflag = True break if set(MB[x]) <= set(Donelist): findflag = True if findflag: continue # find spouse and pc # print("find spouse and pc") pc = MB[x].copy() # print("MB is " + str(MB)) for i in range(len(MB[x])): cutsetsize = 0 break_flag = 0 c = MB[x][i] # print("c is " + str(c)) CanPC = [i for i in MB[x] if i != c] # print("CanPC is " + str(CanPC)) while len(CanPC) >= cutsetsize and cutsetsize <= k: SS = subsets(CanPC, cutsetsize) # print("SS is " + str(SS)) for s in SS: # print("s is " + str(s)) pval, _ = cond_indep_test(data, x, c, s, is_discrete) # print("pval is " + str(pval)) if pval <= alaph: continue else: sepset[x][c] = s # print("sepset[x][c] is " + str(sepset[x][c])) pc.remove(c) break_flag = True break if break_flag: break cutsetsize += 1 # print("pc is " + str(pc)) rest = [i for i in MB[x] if i not in pc] # print("rest is " + str(rest)) for i in range(len(rest)): for j in range(len(pc)): if pc[j] in sepset[x][rest[i]]: continue condition = [str(m) for m in sepset[x][rest[i]]] # print("before condition is " + str(condition)) condition = list(set(condition).union(set(str(rest[i])))) # print("condition is " + str(condition)) pval, _ = cond_indep_test(data, rest[i], x, condition, is_discrete) # print("pval is "+ str(pval)) if pval <= alaph or math.isnan(pval): spouse[j].append(rest[i]) # print("v-structure") # print("spouse is " + str(spouse)) # construct v-strcture for i in range(len(pc)): b = pc[i] DAG[x, b] = 1 DAG[b, x] = 1 if pdag[x, b] == 0 and pdag[b, x] == 0: pdag[x, b] = 1 pdag[b, x] = 1 G[x, b] = 1 G[b, x] = 1 if len(spouse[i]) > 0: for j in range(len(spouse[i])): c = spouse[i][j] DAG[c, b] = 1 DAG[b, c] = 1 DAG[x, c] = 0 DAG[c, x] = 0 pdag[x, b] = -1 pdag[c, b] = -1 pdag[b, x] = 0 pdag[b, c] = 0 pdag[x, c] = 0 pdag[c, x] = 0 G[x, b] = 1 G[c, b] = 1 G[b, x] = 0 G[b, c] = 0 G[c, x] = 0 G[x, c] = 0 # pdag[b, x] = -1;pdag[b, c] = -1;pdag[x, b] = 0;pdag[c, b] = 0;pdag[c, x] = 0;pdag[x, c] = 0 # G[b, x] = 1;G[b, c] = 1;G[x, b] = 0;G[c, b] = 0;G[x, c] = 0;G[c, x] = 0 # oriented by meek approach # print("meek") pDAG = Meek(DAG, pDAG, data) # if all edges connected to T are oriented stop = True connect = [i for i in range(p) if DAG[target, i] == 1] # all nodes connected to target # print("connect is " + str(connect)) for i in connect: if pdag[target, i] != -1 and pdag[i, target] != -1: stop = False break if stop: break if stop: break # print("Donelist is " + str(Donelist)) # print("Waitlist is " + str(Waitlist)) Waitlist = list(set(Waitlist)) for i in Donelist: if i in Waitlist: Waitlist.remove(i) # print("Waitlist is " + str(Waitlist)) np.transpose(G) np.transpose(pdag) parents = [i for i in range(p) if pdag[i, target] == -1] children = [i for i in range(p) if pdag[target, i] == -1] undirected = [i for i in range(p) if pdag[target, i] == 1] return parents, children, undirected # # data = pd.read_csv("F:\cai_algorithm\data\Child_s500_v1.csv") # data = pd.read_csv("F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.csv") # # path = "F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.txt" # # data = np.loadtxt(path, dtype=None, delimiter= ' ') # target = 0 # Graph, p, c = MB_by_MB(data,target,0.01) # print("\nin the last -------------------------------------") # print(Graph) # print("target " + str(target) + " parents are " + str(p)) # print("target " + str(target) + " children are " + str(c))
def MBGSL(data, alpha, is_discrete, selected): _, kvar = np.shape(data) max_k = 3 all_MB = [[] for i in range(kvar)] all_neighbor = [[] for i in range(kvar)] PP = np.zeros((kvar, kvar)) num_CI = 0 for i in range(kvar): if selected == 1: MB, n_c = MMMB(data, i, alpha, is_discrete) elif selected == 2: MB, n_c = HITON_MB(data, i, alpha, is_discrete) elif selected == 3: MB, n_c = semi_HITON_MB(data, i, alpha, is_discrete) else: MB, n_c, dict_cache = PCMB(data, i, alpha, is_discrete) num_CI += n_c for j in MB: PP[i, j] = 1 # # AND Rule # for i in range(kvar): # for j in range(0, i): # if DAG[i, j] != DAG[j, i]: # DAG[i, j] = 0 # DAG[j, i] = 0 for i in range(kvar): for j in range(0, i): if PP[i, j] != PP[j, i]: PP[i, j] = 1 PP[j, i] = 1 for i in range(kvar): for j in range(kvar): if PP[i, j] == 1: all_MB[i].append(j) # removes the possible spouse links between linked variables X and Y for x in range(kvar): for y in all_MB[x]: vs = set(all_MB[x]).union(set(all_MB[y])) varis = list((set(all_MB[x]).difference([y])).union( set(all_MB[y]).difference([x]))) k = 0 break_flag = False while len(varis) > k and k <= max_k: ss = subsets(varis, k) for s in ss: num_CI += 1 pval, _ = cond_indep_test(data, x, y, s, is_discrete) if pval > alpha: PP[x, y] = 0 PP[x, y] = 0 break_flag = True break if break_flag: break k += 1 for i in range(kvar): for j in range(kvar): if PP[i, j] == 1: all_neighbor[i].append(j) DAG = PP.copy() pdag = DAG.copy() G = DAG.copy() # orient edges for x in range(kvar): for y in all_neighbor[x]: sz = list((set(all_neighbor[x]).difference( all_neighbor[y])).difference([y])) for z in sz: PP[y, x] = -1 B = list((set(all_MB[y]).difference([z])).union( set(all_MB[z]).difference([y]))) break_flag = False cutSetSize = 0 while len(B) >= cutSetSize and cutSetSize == 0: SS = subsets(B, cutSetSize) for s in SS: cond_s = list(set(s).union([x])) num_CI += 1 pval, _ = cond_indep_test(data, y, z, cond_s, is_discrete) if pval > alpha: PP[y, x] = 1 break_flag = True break if break_flag: break cutSetSize += 1 if PP[y, x] == -1: pdag[y, x] = -1 pdag[x, y] = 0 G[y, x] = 1 G[x, y] = 0 break DAG, pdag, G = meek(DAG, pdag, G, kvar) return pdag, num_CI
def MMPC(data, target, alpha, is_discrete): number, kVar = np.shape(data) ci_number = 0 CPC = [] deoZeroSet = [] sepset = [[] for i in range(kVar)] while True: M_variables = [ i for i in range(kVar) if i != target and i not in CPC and i not in deoZeroSet ] vari_all_dep_max = -float("inf") vari_chose = 0 # according to pseudocode, <F,assocF> = MaxMinFeuristic(T;CPC) for x in M_variables: # use a function of getMinDep to chose min dep of x x_dep_min, sepset_temp, ci_num2 = getMinDep( data, target, x, CPC, alpha, is_discrete) ci_number += ci_num2 # print(str(x)+" dep min is: " + str(x_dep_min)) # if x chose min dep is 0, it never append to CPC and should not test from now on, if x_dep_min == 0: deoZeroSet.append(x) sepset[x] = [j for j in sepset_temp] elif x_dep_min > vari_all_dep_max: vari_chose = x vari_all_dep_max = x_dep_min # print("x chosed is: " + str(vari_chose)+" and its dep is: " + str(vari_all_dep_max)) if vari_all_dep_max >= 0: # print("CPC append is: "+ str(vari_chose)) CPC.append(vari_chose) else: # CPC has not changed(In other world,CPC not append new), circulate should be break break # print("CPC is:" +str(CPC)) """phaseII :Backward""" # print("shrinking phase begin") CPC_temp = CPC.copy() max_k = 3 for a in CPC_temp: C_subsets = [i for i in CPC if i != a] # please see explanation of the function of getMinDep() explanation # the chinese annotation ,if you see,you will know. if len(C_subsets) > max_k: C_length = max_k else: C_length = len(C_subsets) breakFlag = False for length in range(C_length + 1): if breakFlag: break SS = subsets(C_subsets, length) for S in SS: ci_number += 1 pval, dep = cond_indep_test(data, target, a, S, is_discrete) if pval > alpha: CPC.remove(a) breakFlag = True break return list(set(CPC)), sepset, ci_number
def GSBN(data, alpha, is_discrete): _, kvar = np.shape(data) max_k = 3 all_MB = [[] for i in range(kvar)] all_neighbor = [[] for i in range(kvar)] DAG = np.zeros((kvar, kvar)) # Compute Markov Blankets # Set initial cache value dict_cache = {} dict_cache.setdefault("cache", [0, 0]) for i in range(kvar): MB, _ = GSMB(data, i, alpha, is_discrete) for j in MB: DAG[i, j] = 1 # # AND Rule # for i in range(kvar): # for j in range(0, i): # if DAG[i, j] != DAG[j, i]: # DAG[i, j] = 0 # DAG[j, i] = 0 # OR Rule for i in range(kvar): for j in range(0, i): if DAG[i, j] != DAG[j, i]: DAG[i, j] = 1 DAG[j, i] = 1 for i in range(kvar): for j in range(kvar): if DAG[i, j] == 1: all_MB[i].append(j) # Compute Graph Structure for x in range(kvar): for y in all_MB[x]: vs = set(all_MB[x]).union(set(all_MB[y])) varis = [i for i in vs if i != x and i != y] k = 0 break_flag = False while len(varis) > k and k <= max_k: ss = subsets(varis, k) for s in ss: pval, _ = cond_indep_test(data, x, y, s, is_discrete) if pval > alpha: DAG[x, y] = 0 DAG[x, y] = 0 break_flag = True break if break_flag: break k += 1 for i in range(kvar): for j in range(kvar): if DAG[x, y] == 1: all_neighbor[i].append(j) PP = DAG.copy() pdag = DAG.copy() G = DAG.copy() print("2") # Orient Edges for x in range(kvar): for y in all_neighbor[x]: PP[y, x] = -1 nz_vars = [ i for i in all_neighbor[x] if i != y and i not in all_neighbor[y] ] for z in nz_vars: vs_vars = set(all_neighbor[y]).union(all_neighbor[z]) vs = [i for i in vs_vars if i != z and i != y] k = 0 break_flag = False while len(vs) > k and k <= max_k: ss = subsets(vs, k) for s in ss: con_set = [i for i in s] con_set.append(x) con_set = list(set(con_set)) pval, _ = cond_indep_test(data, y, z, con_set, is_discrete) if pval > alpha: PP[y, x] = 1 break_flag = True break if break_flag: break k += 1 if PP[y, x] == -1: pdag[y, x] = -1 pdag[x, y] = 0 G[y, x] = 1 G[x, y] = 0 print("3") # Remove Cycles [DAG, pdag, G] = meek(DAG, pdag, G, kvar) return pdag # import warnings # warnings.filterwarnings('ignore') # import pandas as pd # data = pd.read_csv("D:/data/Alarm_data/Alarm1_s5000_v7.csv") # print("the file read") # import numpy as np # num1, kvar = np.shape(data) # alpha = 0.01 # # pdag, dic = GSBN(data, alpha, True) # print(pdag) # for i in range(kvar): # for j in range(kvar): # if pdag[i, j] == -1: # print("i: ", i, " ,j: ", j) # print(dic["cache"][0]/(dic["cache"][0]+dic["cache"][1]))
def semi_HITON_PC(data, target, alaph, is_disrete=True): n, p = np.shape(data) ci_number = 0 candidate_pc = [] S = [] current_pc = [] sep = [[] for i in range(p)] con = [i for i in range(p) if i != target] for x in con: ci_number += 1 pval, dep = cond_indep_test(data, target, x, [], is_disrete) if pval <= alaph: S.append([x, dep]) depset = sorted(S, key=lambda x: x[1], reverse=True) for i in range(len(depset)): candidate_pc.append(depset[i][0]) # RANK for x in candidate_pc: breakflag = False current_pc.append(x) conditions_set = [i for i in current_pc if i != x] # print("conditions_set is " + str(conditions_set)) if len(conditions_set) >= 3: Slength = 3 else: Slength = len(conditions_set) for j in range(Slength + 1): SS = subsets(conditions_set, j) for s in SS: ci_number += 1 pval, _ = cond_indep_test(data, x, target, s, is_disrete) if pval > alaph: sep[x] = [i for i in s] current_pc.remove(x) breakflag = True break if breakflag: break # backforward phase except the last add variable Last_added = None if len(current_pc) > 0: Last_added = current_pc[-1] current_temp = current_pc.copy() for x in current_temp: flag = False if x == Last_added: continue con_set = [i for i in current_pc if i != x] if len(con_set) >= 3: leng = 3 else: leng = len(con_set) for j in range(leng + 1): SS = subsets(con_set, j) for s in SS: ci_number += 1 pval, _ = cond_indep_test(data, x, target, s, is_disrete) if pval > alaph: current_pc.remove(x) sep[x] = [i for i in s] flag = True break if flag: break return list(set(current_pc)), sep, ci_number
def MBOR(data, target, alaph, is_discrete=True): _, kVar = np.shape(data) max_k = 3 ci_number = 0 PCS, d_sep, ci_num = PCSuperSet(data, target, alaph, is_discrete) ci_number += ci_num SPS, ci_num = SPSuperSet(data, target, PCS, d_sep, alaph, is_discrete) ci_number += ci_num MBS = list(set(PCS).union(set(SPS))) drop_data_attribute = [ str(i) for i in range(kVar) if i != target and i not in MBS ] data_new = data.drop(drop_data_attribute, axis=1) data_attribute = [i for i in range(kVar) if i == target or i in MBS] PC, ci_num = MBtoPC(data_new, target, alaph, data_attribute, is_discrete) ci_number += ci_num PCS_rmPC = [i for i in PCS if i not in PC] for x in PCS_rmPC: x_pcset, ci_num = MBtoPC(data_new, x, alaph, data_attribute, is_discrete) ci_number += ci_num if target in x_pcset: PC.append(x) SP = [] for x in PC: data_attribute = [i for i in range(kVar) if i != target] x_pcset, ci_num = MBtoPC(data, x, alaph, data_attribute, is_discrete) ci_number += ci_num vari_set = [i for i in x_pcset if i != target and i not in PC] for y in vari_set: break_flag = False condition_all_set = [i for i in MBS if i != target and i != y] clength = len(condition_all_set) if clength > max_k: clength = max_k for j in range(clength + 1): condition_set = subsets(condition_all_set, j) for Z in condition_set: ci_number += 1 pval, _ = cond_indep_test(data, target, y, Z, is_discrete) if pval > alaph: if break_flag: break else: # Find minimal Z ⊂ MBS\{T ∪ Y } such that T ⊥ Y |Z break_flag = True condition_varis = [i for i in Z] condition_varis.append(x) condition_varis = list(set(condition_varis)) ci_number += 1 pval, _ = cond_indep_test(data, target, y, condition_varis, is_discrete) if pval <= alaph: SP.append(y) if break_flag: break MB = list(set(PC).union(set(SP))) return MB, ci_number
def interIAMBnPC(data, target, alaph): n, p = np.shape(data) BT = [] ntest = 0 #depmax=-float("inf") #feature=-1 #pval_temp=1.0 length = -1 removeSet = [] rmNumberSet = [0 for i in range(p)] while len(BT) != length: depmax = -float("inf") feature = -1 pval_temp = 1.0 length = len(BT) #growing phase S = [ i for i in range(p) if i != target and i not in BT and i not in removeSet ] for X in S: ntest += 1 pval, dep = chi_square_test(data, X, target, BT) if dep > depmax: depmax = dep feature = X pval_temp = pval if pval_temp <= alaph: BT.append(feature) #shrinking phase mb_index = len(BT) # 逆序 while mb_index >= 0: mb_index -= 1 x = BT[mb_index] ntest += 1 conditionvars = [i for i in BT if i != x] pval_sp, dep_sp = chi_square_test(data, target, x, conditionvars) if pval_sp > alaph: BT.remove(x) # remove the variables while have be append to MBs just,lead to circulation break if x == feature: break rmNumberSet[x] += 1 if rmNumberSet[x] > 10: removeSet.append(x) """shrinking phase""" TestMB = BT.copy() p = len(TestMB) DAG = np.ones((1, p)) size = 0 continueFlag = True # conditionSet maximum set 3 max_k = 3 while continueFlag: # Candidate of MB traverse for y in range(p): if DAG[0, y] == 0: continue conditionSet = [i for i in range(p) if i != y and DAG[0, i] == 1] SS = subsets(conditionSet, size) for S in SS: condtionVari = [TestMB[i] for i in S] ntest += 1 pval, _ = chi_square_test(data, target, TestMB[y], condtionVari) if pval > alaph: DAG[0, y] = 0 break size += 1 continueFlag = False # circulate will be continue if condition suited if np.sum(DAG[0, :] == 1) >= size and size <= max_k: continueFlag = True # end while MB = [TestMB[i] for i in range(p) if DAG[0, i] == 1] return MB, ntest
def TIE(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) M = [] G = [] max_k = 3 not_in_set = [] possible_subests = [] variable = [i for i in range(kVar)] MB, _ = IAMB(data, target, alaph, variable, is_discrete) M.append(MB) G.append([]) index = 0 s_index = 0 MB_new_set = [] while True: length = len(M[index]) if length > max_k: length = max_k for j in range(length + 1): if j == 0: continue varis_set = subsets(M[index], j) for x in varis_set: break_Flag = False for y in not_in_set: if set(x).issuperset(set(y)): break_Flag = True break if not break_Flag: vari_one = list(set(x).union(set(G[index]))) if vari_one not in possible_subests: possible_subests.append(vari_one) if s_index < len(possible_subests): excpet_varis_set = possible_subests[s_index] s_index += 1 else: break variable_new = [i for i in range(kVar) if i not in excpet_varis_set] MB_new, _ = IAMB(data, target, alaph, variable_new, is_discrete) different_set = list(set(MB).difference(set(MB_new))) if different_set == [] or MB_new == []: continue break_Flag = False for x in different_set: pval, _ = cond_indep_test(data, target, x, MB_new, is_discrete) if pval <= alaph: break_Flag = True not_in_set.append(excpet_varis_set) possible_subests_temp = possible_subests.copy() for y in possible_subests_temp: if excpet_varis_set != y and set(y).issuperset( excpet_varis_set): possible_subests.remove(y) break if not break_Flag: MB_new_set.append(MB_new) M.append(MB_new) G.append(excpet_varis_set) index += 1 return MB_new_set