示例#1
0
def CausalSearch(Data, T, PCT, Z, IDT, alaph, idT3, idT3_count, idT4,
                 idT4_count, is_discrete):

    # step 1:Single PC
    if len(PCT) == 1:
        IDT[T, PCT[0]] = 3

    # step 2:Check C2 & C3
    for i in range(len(PCT)):
        for j in range(len(PCT)):
            if i != j:
                x = PCT[i]
                y = PCT[j]
                if x in Z or y in Z:
                    continue
                # print("X is: ",x," y is: ",y," Z is: ", Z)
                pval, _ = cond_indep_test(Data, x, y, Z, is_discrete)
                condition_vars = [i for i in Z]
                condition_vars.append(T)
                condition_vars = sorted(set(condition_vars))
                pval2, _ = cond_indep_test(Data, x, y, condition_vars,
                                           is_discrete)
                if pval > alaph and pval2 <= alaph:
                    IDT[T, x] = 1
                    IDT[T, y] = 1
                elif pval <= alaph and pval2 > alaph:
                    if IDT[T, x] == 1:
                        IDT[T, y] = 2
                    elif IDT[T, y] != 2:
                        IDT[T, y] = 3
                    if IDT[T, y] == 1:
                        IDT[T, x] = 2
                    elif IDT[T, x] != 2:
                        IDT[T, x] = 3
                    # add(X,Y)to pairs with idT=3
                    idT3_count += 1
                    idT3.append([x, y])
                else:
                    if (IDT[T, x] == 0
                            and IDT[T, y] == 0) or (IDT[T, x] == 4
                                                    and IDT[T, y] == 4):
                        IDT[T, x] = 4
                        IDT[T, y] = 4
                    # add(X,Y) to pairs with idT=4
                    idT4_count += 1
                    idT4.append([x, y])

    # step 3:identify idT=3 pairs with known parents
    for i in range(len(PCT)):
        x = PCT[i]
        if IDT[T, x] == 1:
            for j in range(idT3_count):
                if idT3[j][0] == x:
                    y = idT3[j][1]
                    IDT[T, y] = 2
                elif idT3[j][1] == x:
                    y = idT3[j][0]
                    IDT[T, y] = 2
    return IDT, idT3, idT3_count, idT4, idT4_count
示例#2
0
def HITON_PC(data, target, alaph, is_discrete):
    n, p = np.shape(data)
    PC = []
    sepset = [[] for i in range(p)]
    CanPC = [i for i in range(p) if i != target]
    ntest = 0
    #print("canpc:",CanPC)
    while len(CanPC) > 0:
        CanPC_temp = CanPC.copy()
        #print("canpc_temp:",CanPC_temp)
        #add the best candidata to PC
        for X in CanPC_temp:
            ntest += 1
            dep_max = -float("inf")
            attribute = 0
            pval_temp = 1.0
            pval, dep = cond_indep_test(data, X, target, [], is_discrete)
            if pval > alaph:
                CanPC.remove(X)
                continue
            elif dep > dep_max:
                dep_max = dep
                attribute = X
                pval_temp = pval
        if pval_temp <= alaph:
            PC.append(attribute)
            CanPC.remove(attribute)
        #remove true positives from PC
        PC_temp = PC.copy()
        for Y in PC_temp:
            ntest += 1
            k = 0
            max_k = 3
            breakflag = False
            nbrs = [i for i in PC if i != Y]
            while k <= len(nbrs) and k <= max_k:
                SS = subsets(nbrs, k)
                for S in SS:
                    ntest += 1
                    pval, _ = cond_indep_test(data, target, Y, S, is_discrete)
                    if pval > alaph:
                        sepset[Y] = [i for i in S]
                        PC.remove(Y)
                        breakflag = True
                        break
                if breakflag:
                    break
                k += 1

    return PC, sepset, ntest
示例#3
0
def HITON_MB(data, target, alaph, is_discrete):
    PC,sepset,ntest=HITON_PC(data,target,alaph,is_discrete)
    MB=PC.copy()
    for X in PC:
        ntest+=1
        pcofPC,_,ntest1=HITON_PC(data, X, alaph, is_discrete)
        ntest+=ntest1
        for Y in pcofPC:
            ntest+=1
            if Y!=target and Y not in PC:
                condition_vars=[str(i) for i in sepset[Y]]
                condition_vars.append(str(X))
                condition_vars=list(set(condition_vars))
                pval, _ = cond_indep_test(data, Y, target, condition_vars, is_discrete)
                if pval<=alaph:
                    MB.append(Y)

    return MB,PC

# data = pd.read_csv("E:/python/pycharm/algorithm/data/Child_s500_v1.csv")
# MB ,ntest= HITON_MB(data, 1, 0.01)
# print(MB)
# print(ntest)
示例#4
0
def MBbyMB(data, target, alpha, is_discrete=True):

    ci_test = 0
    max_k = 3
    _, kvar = np.shape(data)
    DAG = np.zeros((kvar, kvar))
    pdag = DAG.copy()
    G = DAG.copy()
    mb_calcualted = [True for i in range(kvar)]
    all_pc = [[] for i in range(kvar)]
    all_mb = [[] for i in range(kvar)]
    all_can_spouse = [[] for i in range(kvar)]
    all_sepset = [[[]] * kvar for i in range(kvar)]
    Q = [target]
    tmp = []

    num_calculated = 0

    while len(tmp) <= kvar and len(Q) > 0:
        A = Q[0]
        del Q[0]
        if A in tmp:
            continue
        else:
            tmp.append(A)

        # get MB(A)
        if mb_calcualted[A]:
            all_mb[A], ntest = MMMB(data, A, alpha, is_discrete)
            ci_test += ntest
            mb_calcualted[A] = False

        all_pc[A] = all_mb[A].copy()

        for B in all_mb[A]:
            Q.append(B)
            DAG[A, B] = 1
            DAG[B, A] = 1
            if pdag[A, B] == 0 and pdag[B, A] == 0:
                pdag[A, B] = 1
                pdag[B, A] = 1
                G[A, B] = 1
                G[B, A] = 1

            cutSetSize = 0
            break_flag = False
            can_pc = [i for i in all_mb[A] if i != B]
            while len(can_pc) >= cutSetSize and cutSetSize <= max_k:
                SS = subsets(can_pc, cutSetSize)
                for z in SS:
                    ci_test += 1
                    pval, _ = cond_indep_test(data, B, A, z, is_discrete)

                    if pval > alpha:
                        all_sepset[A][B] = [i for i in z]
                        all_sepset[B][A] = [i for i in z]

                        DAG[A, B] = 0
                        DAG[B, A] = 0
                        pdag[A, B] = 0
                        pdag[B, A] = 0
                        G[A, B] = 0
                        G[B, A] = 0

                        all_pc[A] = [i for i in all_pc[A] if i != B]
                        all_can_spouse[A].append(B)

                        break_flag = True
                        break
                if break_flag:
                    break
                cutSetSize += 1
        # print("all_sepset: ", all_sepset)
        # find v-structures
        for C in all_can_spouse[A]:
            for B in all_pc[A]:

                # A->B<-C
                if B not in all_sepset[A][C]:
                    DAG[A, B] = 1
                    DAG[B, A] = 1

                    pdag[A, B] = -1
                    pdag[B, A] = 0

                    pdag[C, B] = -1
                    pdag[B, C] = 0

                    G[A, B] = 1
                    G[B, A] = 0

                    G[C, B] = 1
                    G[B, C] = 0

        [DAG, pdag, G] = meek(DAG, pdag, G, kvar)

        num_calculated += 1
        if num_calculated > len(all_mb[target]):
            if 1 not in pdag[target, :] and 1 not in pdag[:, target]:
                break

    parents = [i for i in range(kvar) if pdag[i, target] == -1]
    children = [i for i in range(kvar) if pdag[target, i] == -1]
    undirected = [i for i in range(kvar) if pdag[target, i] == 1]
    PC = list(set(parents).union(set(children)).union(set(undirected)))

    return parents, children, PC, undirected


# import warnings
# warnings.filterwarnings('ignore')
# import pandas as pd
# data = pd.read_csv("D:/data/alarm_data/Alarm1_s5000_v6.csv")
# print("the file read")
# import numpy as np
# num1, kvar = np.shape(data)
# alaph = 0.01
#
# for target in range(kvar):
#     P, C, PC, und = MBbyMB(data, target, alaph, True)
#     print(target," -P: ", P, " ,C: ", C, " ,PC: ", PC, " ,undire: ",und)
示例#5
0
def MB_by_MB(data, target, alaph, is_discrete=True):
    n, p = np.shape(data)
    Donelist = []  # whose MBs have been found
    Waitlist = [target]  # whose MBs will be foundM
    G = np.zeros((p, p))  # 1 denotes ->, 0 denote no edges
    pdag = G.copy()  # -1 denotes ->, 0 denote no edges
    DAG = G.copy()  # 1 denote -,0 denote no edges
    MB = [[] for i in range(p)]
    sepset = [[[]] * p for i in range(p)]
    k = 3
    while Waitlist != []:
        stop = False
        Waitlist_temp = Waitlist.copy()
        for x in Waitlist_temp:
            spouse = [[] for i in range(p)]
            Donelist.append(x)
            Waitlist.remove(x)
            MB[x], _ = IAMB(data, x, alaph, is_discrete)
            for i in MB[x]:
                Waitlist.append(i)
            findflag = False
            for i in range(len(MB)):
                if set(MB[x]) < set(MB[i]):
                    findflag = True
                    break
            if set(MB[x]) <= set(Donelist):
                findflag = True
            if findflag:
                continue
            # find spouse and pc
            # print("find spouse and pc")
            pc = MB[x].copy()
            # print("MB is " + str(MB))
            for i in range(len(MB[x])):
                cutsetsize = 0
                break_flag = 0
                c = MB[x][i]
                # print("c is " + str(c))
                CanPC = [i for i in MB[x] if i != c]
                # print("CanPC is " + str(CanPC))
                while len(CanPC) >= cutsetsize and cutsetsize <= k:
                    SS = subsets(CanPC, cutsetsize)
                    # print("SS is " + str(SS))
                    for s in SS:
                        # print("s is " + str(s))
                        pval, _ = cond_indep_test(data, x, c, s, is_discrete)
                        # print("pval is " + str(pval))
                        if pval <= alaph:
                            continue
                        else:
                            sepset[x][c] = s
                            # print("sepset[x][c] is " + str(sepset[x][c]))
                            pc.remove(c)
                            break_flag = True
                            break
                    if break_flag:
                        break
                    cutsetsize += 1
            # print("pc is " + str(pc))
            rest = [i for i in MB[x] if i not in pc]
            # print("rest is " + str(rest))
            for i in range(len(rest)):
                for j in range(len(pc)):
                    if pc[j] in sepset[x][rest[i]]:
                        continue
                    condition = [str(m) for m in sepset[x][rest[i]]]
                    # print("before condition is " + str(condition))
                    condition = list(set(condition).union(set(str(rest[i]))))
                    # print("condition is " + str(condition))
                    pval, _ = cond_indep_test(data, rest[i], x, condition,
                                              is_discrete)
                    # print("pval is "+ str(pval))
                    if pval <= alaph or math.isnan(pval):
                        spouse[j].append(rest[i])

            # print("v-structure")
            # print("spouse is " + str(spouse))
            # construct v-strcture
            for i in range(len(pc)):
                b = pc[i]
                DAG[x, b] = 1
                DAG[b, x] = 1
                if pdag[x, b] == 0 and pdag[b, x] == 0:
                    pdag[x, b] = 1
                    pdag[b, x] = 1
                    G[x, b] = 1
                    G[b, x] = 1
                if len(spouse[i]) > 0:
                    for j in range(len(spouse[i])):
                        c = spouse[i][j]
                        DAG[c, b] = 1
                        DAG[b, c] = 1
                        DAG[x, c] = 0
                        DAG[c, x] = 0
                        pdag[x, b] = -1
                        pdag[c, b] = -1
                        pdag[b, x] = 0
                        pdag[b, c] = 0
                        pdag[x, c] = 0
                        pdag[c, x] = 0
                        G[x, b] = 1
                        G[c, b] = 1
                        G[b, x] = 0
                        G[b, c] = 0
                        G[c, x] = 0
                        G[x, c] = 0
                        # pdag[b, x] = -1;pdag[b, c] = -1;pdag[x, b] = 0;pdag[c, b] = 0;pdag[c, x] = 0;pdag[x, c] = 0
                        # G[b, x] = 1;G[b, c] = 1;G[x, b] = 0;G[c, b] = 0;G[x, c] = 0;G[c, x] = 0
            # oriented by meek approach
            # print("meek")
            pDAG = Meek(DAG, pDAG, data)
            # if all edges connected to T are oriented
            stop = True
            connect = [i for i in range(p)
                       if DAG[target, i] == 1]  # all nodes connected to target
            # print("connect is " + str(connect))
            for i in connect:
                if pdag[target, i] != -1 and pdag[i, target] != -1:
                    stop = False
                    break
            if stop:
                break
        if stop:
            break
        # print("Donelist is " + str(Donelist))
        # print("Waitlist is " + str(Waitlist))
        Waitlist = list(set(Waitlist))
        for i in Donelist:
            if i in Waitlist:
                Waitlist.remove(i)
        # print("Waitlist is " + str(Waitlist))
    np.transpose(G)
    np.transpose(pdag)
    parents = [i for i in range(p) if pdag[i, target] == -1]
    children = [i for i in range(p) if pdag[target, i] == -1]
    undirected = [i for i in range(p) if pdag[target, i] == 1]
    return parents, children, undirected


# # data = pd.read_csv("F:\cai_algorithm\data\Child_s500_v1.csv")
# data = pd.read_csv("F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.csv")
# # path = "F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.txt"
# # data = np.loadtxt(path, dtype=None, delimiter= ' ')
# target = 0
# Graph, p, c = MB_by_MB(data,target,0.01)
# print("\nin the last -------------------------------------")
# print(Graph)
# print("target " + str(target) + " parents are " + str(p))
# print("target " + str(target) + " children are " + str(c))