def CausalSearch(Data, T, PCT, Z, IDT, alaph, idT3, idT3_count, idT4, idT4_count, is_discrete): # step 1:Single PC if len(PCT) == 1: IDT[T, PCT[0]] = 3 # step 2:Check C2 & C3 for i in range(len(PCT)): for j in range(len(PCT)): if i != j: x = PCT[i] y = PCT[j] if x in Z or y in Z: continue # print("X is: ",x," y is: ",y," Z is: ", Z) pval, _ = cond_indep_test(Data, x, y, Z, is_discrete) condition_vars = [i for i in Z] condition_vars.append(T) condition_vars = sorted(set(condition_vars)) pval2, _ = cond_indep_test(Data, x, y, condition_vars, is_discrete) if pval > alaph and pval2 <= alaph: IDT[T, x] = 1 IDT[T, y] = 1 elif pval <= alaph and pval2 > alaph: if IDT[T, x] == 1: IDT[T, y] = 2 elif IDT[T, y] != 2: IDT[T, y] = 3 if IDT[T, y] == 1: IDT[T, x] = 2 elif IDT[T, x] != 2: IDT[T, x] = 3 # add(X,Y)to pairs with idT=3 idT3_count += 1 idT3.append([x, y]) else: if (IDT[T, x] == 0 and IDT[T, y] == 0) or (IDT[T, x] == 4 and IDT[T, y] == 4): IDT[T, x] = 4 IDT[T, y] = 4 # add(X,Y) to pairs with idT=4 idT4_count += 1 idT4.append([x, y]) # step 3:identify idT=3 pairs with known parents for i in range(len(PCT)): x = PCT[i] if IDT[T, x] == 1: for j in range(idT3_count): if idT3[j][0] == x: y = idT3[j][1] IDT[T, y] = 2 elif idT3[j][1] == x: y = idT3[j][0] IDT[T, y] = 2 return IDT, idT3, idT3_count, idT4, idT4_count
def HITON_PC(data, target, alaph, is_discrete): n, p = np.shape(data) PC = [] sepset = [[] for i in range(p)] CanPC = [i for i in range(p) if i != target] ntest = 0 #print("canpc:",CanPC) while len(CanPC) > 0: CanPC_temp = CanPC.copy() #print("canpc_temp:",CanPC_temp) #add the best candidata to PC for X in CanPC_temp: ntest += 1 dep_max = -float("inf") attribute = 0 pval_temp = 1.0 pval, dep = cond_indep_test(data, X, target, [], is_discrete) if pval > alaph: CanPC.remove(X) continue elif dep > dep_max: dep_max = dep attribute = X pval_temp = pval if pval_temp <= alaph: PC.append(attribute) CanPC.remove(attribute) #remove true positives from PC PC_temp = PC.copy() for Y in PC_temp: ntest += 1 k = 0 max_k = 3 breakflag = False nbrs = [i for i in PC if i != Y] while k <= len(nbrs) and k <= max_k: SS = subsets(nbrs, k) for S in SS: ntest += 1 pval, _ = cond_indep_test(data, target, Y, S, is_discrete) if pval > alaph: sepset[Y] = [i for i in S] PC.remove(Y) breakflag = True break if breakflag: break k += 1 return PC, sepset, ntest
def HITON_MB(data, target, alaph, is_discrete): PC,sepset,ntest=HITON_PC(data,target,alaph,is_discrete) MB=PC.copy() for X in PC: ntest+=1 pcofPC,_,ntest1=HITON_PC(data, X, alaph, is_discrete) ntest+=ntest1 for Y in pcofPC: ntest+=1 if Y!=target and Y not in PC: condition_vars=[str(i) for i in sepset[Y]] condition_vars.append(str(X)) condition_vars=list(set(condition_vars)) pval, _ = cond_indep_test(data, Y, target, condition_vars, is_discrete) if pval<=alaph: MB.append(Y) return MB,PC # data = pd.read_csv("E:/python/pycharm/algorithm/data/Child_s500_v1.csv") # MB ,ntest= HITON_MB(data, 1, 0.01) # print(MB) # print(ntest)
def MBbyMB(data, target, alpha, is_discrete=True): ci_test = 0 max_k = 3 _, kvar = np.shape(data) DAG = np.zeros((kvar, kvar)) pdag = DAG.copy() G = DAG.copy() mb_calcualted = [True for i in range(kvar)] all_pc = [[] for i in range(kvar)] all_mb = [[] for i in range(kvar)] all_can_spouse = [[] for i in range(kvar)] all_sepset = [[[]] * kvar for i in range(kvar)] Q = [target] tmp = [] num_calculated = 0 while len(tmp) <= kvar and len(Q) > 0: A = Q[0] del Q[0] if A in tmp: continue else: tmp.append(A) # get MB(A) if mb_calcualted[A]: all_mb[A], ntest = MMMB(data, A, alpha, is_discrete) ci_test += ntest mb_calcualted[A] = False all_pc[A] = all_mb[A].copy() for B in all_mb[A]: Q.append(B) DAG[A, B] = 1 DAG[B, A] = 1 if pdag[A, B] == 0 and pdag[B, A] == 0: pdag[A, B] = 1 pdag[B, A] = 1 G[A, B] = 1 G[B, A] = 1 cutSetSize = 0 break_flag = False can_pc = [i for i in all_mb[A] if i != B] while len(can_pc) >= cutSetSize and cutSetSize <= max_k: SS = subsets(can_pc, cutSetSize) for z in SS: ci_test += 1 pval, _ = cond_indep_test(data, B, A, z, is_discrete) if pval > alpha: all_sepset[A][B] = [i for i in z] all_sepset[B][A] = [i for i in z] DAG[A, B] = 0 DAG[B, A] = 0 pdag[A, B] = 0 pdag[B, A] = 0 G[A, B] = 0 G[B, A] = 0 all_pc[A] = [i for i in all_pc[A] if i != B] all_can_spouse[A].append(B) break_flag = True break if break_flag: break cutSetSize += 1 # print("all_sepset: ", all_sepset) # find v-structures for C in all_can_spouse[A]: for B in all_pc[A]: # A->B<-C if B not in all_sepset[A][C]: DAG[A, B] = 1 DAG[B, A] = 1 pdag[A, B] = -1 pdag[B, A] = 0 pdag[C, B] = -1 pdag[B, C] = 0 G[A, B] = 1 G[B, A] = 0 G[C, B] = 1 G[B, C] = 0 [DAG, pdag, G] = meek(DAG, pdag, G, kvar) num_calculated += 1 if num_calculated > len(all_mb[target]): if 1 not in pdag[target, :] and 1 not in pdag[:, target]: break parents = [i for i in range(kvar) if pdag[i, target] == -1] children = [i for i in range(kvar) if pdag[target, i] == -1] undirected = [i for i in range(kvar) if pdag[target, i] == 1] PC = list(set(parents).union(set(children)).union(set(undirected))) return parents, children, PC, undirected # import warnings # warnings.filterwarnings('ignore') # import pandas as pd # data = pd.read_csv("D:/data/alarm_data/Alarm1_s5000_v6.csv") # print("the file read") # import numpy as np # num1, kvar = np.shape(data) # alaph = 0.01 # # for target in range(kvar): # P, C, PC, und = MBbyMB(data, target, alaph, True) # print(target," -P: ", P, " ,C: ", C, " ,PC: ", PC, " ,undire: ",und)
def MB_by_MB(data, target, alaph, is_discrete=True): n, p = np.shape(data) Donelist = [] # whose MBs have been found Waitlist = [target] # whose MBs will be foundM G = np.zeros((p, p)) # 1 denotes ->, 0 denote no edges pdag = G.copy() # -1 denotes ->, 0 denote no edges DAG = G.copy() # 1 denote -,0 denote no edges MB = [[] for i in range(p)] sepset = [[[]] * p for i in range(p)] k = 3 while Waitlist != []: stop = False Waitlist_temp = Waitlist.copy() for x in Waitlist_temp: spouse = [[] for i in range(p)] Donelist.append(x) Waitlist.remove(x) MB[x], _ = IAMB(data, x, alaph, is_discrete) for i in MB[x]: Waitlist.append(i) findflag = False for i in range(len(MB)): if set(MB[x]) < set(MB[i]): findflag = True break if set(MB[x]) <= set(Donelist): findflag = True if findflag: continue # find spouse and pc # print("find spouse and pc") pc = MB[x].copy() # print("MB is " + str(MB)) for i in range(len(MB[x])): cutsetsize = 0 break_flag = 0 c = MB[x][i] # print("c is " + str(c)) CanPC = [i for i in MB[x] if i != c] # print("CanPC is " + str(CanPC)) while len(CanPC) >= cutsetsize and cutsetsize <= k: SS = subsets(CanPC, cutsetsize) # print("SS is " + str(SS)) for s in SS: # print("s is " + str(s)) pval, _ = cond_indep_test(data, x, c, s, is_discrete) # print("pval is " + str(pval)) if pval <= alaph: continue else: sepset[x][c] = s # print("sepset[x][c] is " + str(sepset[x][c])) pc.remove(c) break_flag = True break if break_flag: break cutsetsize += 1 # print("pc is " + str(pc)) rest = [i for i in MB[x] if i not in pc] # print("rest is " + str(rest)) for i in range(len(rest)): for j in range(len(pc)): if pc[j] in sepset[x][rest[i]]: continue condition = [str(m) for m in sepset[x][rest[i]]] # print("before condition is " + str(condition)) condition = list(set(condition).union(set(str(rest[i])))) # print("condition is " + str(condition)) pval, _ = cond_indep_test(data, rest[i], x, condition, is_discrete) # print("pval is "+ str(pval)) if pval <= alaph or math.isnan(pval): spouse[j].append(rest[i]) # print("v-structure") # print("spouse is " + str(spouse)) # construct v-strcture for i in range(len(pc)): b = pc[i] DAG[x, b] = 1 DAG[b, x] = 1 if pdag[x, b] == 0 and pdag[b, x] == 0: pdag[x, b] = 1 pdag[b, x] = 1 G[x, b] = 1 G[b, x] = 1 if len(spouse[i]) > 0: for j in range(len(spouse[i])): c = spouse[i][j] DAG[c, b] = 1 DAG[b, c] = 1 DAG[x, c] = 0 DAG[c, x] = 0 pdag[x, b] = -1 pdag[c, b] = -1 pdag[b, x] = 0 pdag[b, c] = 0 pdag[x, c] = 0 pdag[c, x] = 0 G[x, b] = 1 G[c, b] = 1 G[b, x] = 0 G[b, c] = 0 G[c, x] = 0 G[x, c] = 0 # pdag[b, x] = -1;pdag[b, c] = -1;pdag[x, b] = 0;pdag[c, b] = 0;pdag[c, x] = 0;pdag[x, c] = 0 # G[b, x] = 1;G[b, c] = 1;G[x, b] = 0;G[c, b] = 0;G[x, c] = 0;G[c, x] = 0 # oriented by meek approach # print("meek") pDAG = Meek(DAG, pDAG, data) # if all edges connected to T are oriented stop = True connect = [i for i in range(p) if DAG[target, i] == 1] # all nodes connected to target # print("connect is " + str(connect)) for i in connect: if pdag[target, i] != -1 and pdag[i, target] != -1: stop = False break if stop: break if stop: break # print("Donelist is " + str(Donelist)) # print("Waitlist is " + str(Waitlist)) Waitlist = list(set(Waitlist)) for i in Donelist: if i in Waitlist: Waitlist.remove(i) # print("Waitlist is " + str(Waitlist)) np.transpose(G) np.transpose(pdag) parents = [i for i in range(p) if pdag[i, target] == -1] children = [i for i in range(p) if pdag[target, i] == -1] undirected = [i for i in range(p) if pdag[target, i] == 1] return parents, children, undirected # # data = pd.read_csv("F:\cai_algorithm\data\Child_s500_v1.csv") # data = pd.read_csv("F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.csv") # # path = "F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.txt" # # data = np.loadtxt(path, dtype=None, delimiter= ' ') # target = 0 # Graph, p, c = MB_by_MB(data,target,0.01) # print("\nin the last -------------------------------------") # print(Graph) # print("target " + str(target) + " parents are " + str(p)) # print("target " + str(target) + " children are " + str(c))