Exemplo n.º 1
0
def testMI2(seed, k=3, c=0.1, ntests=10, p=float('inf')):

    np.random.seed(seed)
    samples = [100, 200, 400, 800, 1600, 3200]

    knn = KnnEstimator(k=k)
    trueMI = -1 / 2 * np.log(1 - c**2)

    errors = np.zeros((2, len(samples)))

    for jj in range(0, ntests):

        ii = 0
        for n in samples:
            cov_m = [[1.0, c], [c, 1.0]]
            data = np.random.multivariate_normal([0, 0], cov_m, n)
            errors[0, ii] += (trueMI -
                              knn._mi1(data[:, [0]], data[:, [1]])) / ntests
            errors[1, ii] += (trueMI -
                              knn._mi2(data[:, [0]], data[:, [1]])) / ntests
            ii += 1

    plt.figure()
    plt.xlim(0.9, len(samples) + 0.1)
    x = list(range(1, len(samples) + 1, 1))
    for ii in range(0, 2):
        lab = "MI_" + str(ii)
        plt.plot(x, errors[ii, :], label=lab, marker='o')
        plt.xticks(x, samples)

    #plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.legend(loc='best')
    plt.show()

    return errors
Exemplo n.º 2
0
    def __init__(self,
                 X,
                 MBalgorithm="IAMB",
                 ci_estimator=None,
                 symmetryRule="AND",
                 MBresolve="colliders",
                 mode="UG"):

        self.X = X
        self.n, self.d = X.shape
        self.aMat = np.zeros((self.d, self.d), dtype=np.int)
        self.MBs = dict()
        self.symmetryRule = symmetryRule
        self.MBresolve = MBresolve
        self.mode = mode

        if ci_estimator is None:
            self.estimator = KnnEstimator()
        else:
            self.estimator = ci_estimator

        if MBalgorithm == "IAMB":
            self.MBalgorithm = IAMB(self.X,
                                    estimator=self.estimator,
                                    mode=self.mode)
        elif MBalgorithm == "GS":
            self.MBalgorithm = GS(self.X, estimator=self.estimator)
        elif MBalgorithm == "interIAMB":
            self.MBalgorithm = interIAMB(self.X, estimator=self.estimator)
        else:
            print("Warning: MBalgorithm  \'",
                  MBalgorithm,
                  "\' is not defined. Using the IAMB-algorithm instead.",
                  sep="")
            self.MBalgorithm = IAMB(self.X, self.estimator)
Exemplo n.º 3
0
def visualizeMeanderCI(samples,
                       k=5,
                       permutations=200,
                       seed=123,
                       sig=0.05,
                       k_perm=5,
                       corrCheck=False,
                       data=1):

    knn = KnnEstimator(k=k,
                       permutations=permutations,
                       sig=sig,
                       corrCheck=corrCheck,
                       k_perm=None)
    knn_local = KnnEstimator(k=k,
                             permutations=permutations,
                             sig=sig,
                             corrCheck=corrCheck,
                             k_perm=k_perm)

    if data == 1:
        X, Y, Z = createMeanderData(samples)
    elif data == 2:
        X, Y, Z = creteMeanderDataVstructure(samples)

    indep, estMI, _, _, MIs = knn._permutationTest(X, Y, Z)
    indep_l, estMI_l, _, _, MIs_l = knn_local._permutationTest(X, Y, Z)

    plt.scatter(X, Y)

    plt.figure(2)

    sns.kdeplot(np.array(MIs), label="knn", shade=True)
    ax = sns.kdeplot(np.array(MIs_l), label="knn_local", shade=True)
    ax.axvline(x=estMI,
               ymin=0,
               ymax=1,
               c="red",
               label="estimated MI",
               linestyle="--")
    ax.legend()
    print("knn naive permutation: ", indep, "\n", "knn local permutation: ",
          indep_l)

    return (indep, indep_l, estMI, estMI_l, ax, MIs, MIs_l)
Exemplo n.º 4
0
    def __init__(self, X, algorithm, estimator=None):
        self.cache = dict()
        self.nTests = 0
        self.X = X

        if estimator is None:
            self.estimator = KnnEstimator()
        else:
            self.estimator = estimator
        self.algorithm = algorithm
Exemplo n.º 5
0
def testMeander(samples,
                k=5,
                permutations=200,
                seed=123,
                tests=100,
                sig=0.05,
                k_perm=None,
                corrCheck=False):
    np.random.seed(seed)

    ff = FisherCI()
    knn = KnnEstimator(k=k,
                       permutations=permutations,
                       sig=sig,
                       corrCheck=corrCheck,
                       k_perm=k_perm)
    #knn = KnnEstimator(k = k, permutations = permutations, sig = sig)
    #maxS = np.max(samples)

    XIIYf = 0
    XIIYZf = 0

    XIIYknn = 0
    XIIYZknn = 0

    for ii in range(tests):

        X, Y, Z = createMeanderData(samples)
        #X= scale(X)
        #Y = scale(Y)
        #Z = scale(Z)

        indepf, depf = ff.independent(X, Y)
        indepk, depk = knn.independent(X, Y)

        if (indepf == False):
            XIIYf += 1
        if (indepk == False):
            XIIYknn += 1

        indepf, depf = ff.independent(X, Y, Z)
        indepk, depk = knn.independent(X, Y, Z)

        if (indepf == True):
            XIIYZf += 1
        if (indepk == True):
            XIIYZknn += 1
    print("Sample size:", samples)
    print("        Reject X || Y      Accept X || Y | Z")
    print("Fisher    ", XIIYf / tests, "           ", XIIYZf / tests)
    print("kNN       ", XIIYknn / tests, "           ", XIIYZknn / tests)
Exemplo n.º 6
0
def compErrors(data, samples, ks, trueMI, p):
    nsamples = len(samples)
    nks = len(ks)
    res1 = np.zeros((nks, nsamples))
    ni = 0

    for n in samples:
        ki = 0
        for k in ks:
            X = data[:n, :]
            aa = KnnEstimator(k=k, p=p)
            res1[ki, ni] = trueMI - aa._entropy(X)
            ki += +1

        ni += 1

    return (res1)
Exemplo n.º 7
0
def compErrors2(x, y, samples, ks, trueMI, p, z=None):
    nsamples = len(samples)
    nks = len(ks)
    res1 = np.zeros((nks, nsamples))
    ni = 0

    for n in samples:
        ki = 0
        for k in ks:
            aa = KnnEstimator(k=k, p=p)

            if z is not None:
                res1[ki, ni] = trueMI - aa._cmi1(x[:n, :], y[:n, :], z[:n, :])

            else:
                res1[ki, ni] = trueMI - aa._cmi1(x[:n, :], y[:n, :], z)

            ki += +1
        ni += 1

    return res1
Exemplo n.º 8
0
 def __init__(self, X, estimator=None):
     if estimator is None:
         estimator = KnnEstimator()
     MBAlgorithms.__init__(self, X, "Grow-Shrink", estimator)
Exemplo n.º 9
0
    def __init__(self, X, estimator=None, mode='DAG'):
        if estimator is None:
            estimator = KnnEstimator()

        MBAlgorithms.__init__(self, X, "interIAMB", estimator)
        self.mode = mode
Exemplo n.º 10
0
def mvnormal_cmi_null(samples=100,
                      t=10000,
                      k=3,
                      sig=0.05,
                      permutations=200,
                      k_perm=None):

    icmat = np.array([[1, 0, 0.2], [0, 1, 0.8], [0.2, 0.8, 1]])
    c_mat = np.linalg.inv(icmat)

    meann = c_mat.shape[0] * [0]

    true_cmi_dep = mvnCMI(c_mat, [1], [2], [0])
    true_cmi_indep = mvnCMI(c_mat, [0], [1], [2])

    knn = KnnEstimator(k=k,
                       sig=sig,
                       permutations=permutations,
                       corrCheck=False,
                       k_perm=k_perm)

    cmi_dep = []
    cmi_indep = []

    for ii in range(0, t):
        X = np.random.multivariate_normal(meann, c_mat, samples)
        c_mat_est = np.cov(X, rowvar=False)

        cmi_dep.append(mvnCMI(c_mat_est, [0], [2], [1]))
        cmi_indep.append(mvnCMI(c_mat_est, [0], [1], [2]))

    sns.distplot(cmi_dep, hist=False, label="null_dep")
    sns.distplot(cmi_indep, hist=False, label="null_indep")

    indep_dep, estMI_dep, _, estPVal_dep, MIs_dep = knn._permutationTest(
        X[:, [0]], X[:, [2]], X[:, [1]])

    MIdep_2 = []

    for mi in MIs_dep:
        if mi < 0:
            MIdep_2.append(0)
        else:
            MIdep_2.append(mi)

    indep_indep, estMI_indep, _, estPVal_indep, MIs_indep = knn._permutationTest(
        X[:, [0]], X[:, [1]], X[:, [2]])

    MIindep_2 = []

    for mi in MIs_indep:
        if mi < 0:
            MIindep_2.append(0)
        else:
            MIindep_2.append(mi)

    #print("P-val from permutation test (dependent case): ",estPVal_dep)
    #p_null_dep = np.sum(np.array(cmi_dep) >= estMI_dep )/len(cmi_dep)
    p_null_indep = np.sum(np.array(cmi_indep) >= estMI_indep) / len(cmi_indep)
    #print("P-val from the null-distribution: ", p_null_dep)
    #print("------------------")
    print("P-val from permutation test (independent case): ", estPVal_indep)
    print("P-val from the null-distribution: ", p_null_indep)

    print(MIindep_2)

    sns.distplot(MIdep_2, hist=False, label="permutation_dep")
    sns.distplot(MIindep_2, hist=False, label="permutation_indep")

    plt.legend()

    return (cmi_dep, cmi_indep, true_cmi_dep)
Exemplo n.º 11
0
def testCMI2(seed, k=3, tests=10):
    np.random.seed(seed)
    samples = [100, 25000]

    ic = np.array([[1.0, -0.2, 0], [-0.2, 1.0, 0.6], [0, 0.6, 1.0]])
    c = np.linalg.inv(ic)
    c[0, 0] = 10 * c[0, 0]

    n = np.max(samples)
    nsamples = len(samples)
    res1 = np.zeros((2, nsamples))
    res2 = np.zeros((2, nsamples))
    res3 = np.zeros((2, nsamples))

    knn = KnnEstimator(k=k)
    for tt in range(0, tests):

        data = np.random.multivariate_normal([0, 0, 0], c, n)
        jj = 0
        for ss in samples:
            x, y, z = 0, 2, 1
            cmixy_zT = mvnCMI(c, [x], [y], [z])
            res1[0, jj] += (cmixy_zT - knn._cmi1(
                data[:ss, [x]], data[:ss, [y]], data[:ss, [z]])) / tests
            res1[1, jj] += (cmixy_zT - knn._cmi2(
                data[:ss, [x]], data[:ss, [y]], data[:ss, [z]])) / tests

            x, y, z = 1, 2, 0
            cmixy_zT = mvnCMI(c, [x], [y], [z])
            res2[0, jj] += (cmixy_zT - knn._cmi1(
                data[:ss, [x]], data[:ss, [y]], data[:ss, [z]])) / tests
            res2[1, jj] += (cmixy_zT - knn._cmi2(
                data[:ss, [x]], data[:ss, [y]], data[:ss, [z]])) / tests

            x, y, z = 0, 1, 2
            cmixy_zT = mvnCMI(c, [x], [y], [z])
            res3[0, jj] += (cmixy_zT - knn._cmi1(
                data[:ss, [x]], data[:ss, [y]], data[:ss, [z]])) / tests
            res3[1, jj] += (cmixy_zT - knn._cmi2(
                data[:ss, [x]], data[:ss, [y]], data[:ss, [z]])) / tests
            jj += 1

    plt.figure()
    plt.xlim(0.9, len(samples) + 0.1)
    x = list(range(1, len(samples) + 1, 1))
    for ii in range(0, 2):
        lab = "MI_" + str(ii)
        plt.plot(x, res1[ii, :], label=lab, marker='o')
        plt.xticks(x, samples)

    #plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.legend(loc='best')
    plt.show()

    plt.figure()
    plt.xlim(0.9, len(samples) + 0.1)
    x = list(range(1, len(samples) + 1, 1))
    for ii in range(0, 2):
        lab = "MI_" + str(ii)
        plt.plot(x, res2[ii, :], label=lab, marker='o')
        plt.xticks(x, samples)

    #plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.legend(loc='best')
    plt.show()

    plt.figure()
    plt.xlim(0.9, len(samples) + 0.1)
    x = list(range(1, len(samples) + 1, 1))
    for ii in range(0, 2):
        lab = "MI_" + str(ii)
        plt.plot(x, res3[ii, :], label=lab, marker='o')
        plt.xticks(x, samples)

    #plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.legend(loc='best')
    plt.show()
Exemplo n.º 12
0
def compareKnns(testName,
                  seed = 123432,
                  folderName = "knn_est_test",
                  ntests = 25,
                  ns = None,
                  SAVE = True):
    
    if ns is None:
        ns = [125,250,500,1000, 2000]
    
    # different random number generators for data generation and for the knnMI method (permutation tests) 
    rrData = np.random.RandomState(seed)
    rr1 = np.random.RandomState(seed + 1) 
    
    # global rng is also used..
    np.random.seed(seed)
 
    cores = mp.cpu_count()
    
    k_values = [0.01,0.1,0.2,3,5]
    local_perm = [True,False]
    graph_rules = ["AND","OR"]

    methods = list(product(k_values,local_perm))
    method_names = [__method2str(method) for method in product(k_values,local_perm,graph_rules)]
     
    # initilize dictionaries for results
    res = {"HD" : [], "UG" : []} # measured quantities are keys
    Nres = {n : copy.deepcopy(res) for n in ns}
    allRes = {method : copy.deepcopy(Nres) for method in method_names}
              
    # used parameters          
    parameters = {"seed": seed, 
                  "ntests": 0,
                  "ns": ns,
                  "testName" : testName,
                  "methods" : method_names,
                  "trueUGs" : []}    
                
    # create folder where to save the resuls              
    if folderName is None:
        directory = "tests"
    else:
        directory = "tests/" + folderName
    
    if not os.path.exists(directory):
        os.makedirs(directory)            
        
    test_str = __test2str(testName)
    filename = directory + "/" + test_str + ".p"
    
    # create object for generating data and run the tests
    dd = DataGenerator(testName,rng = rrData)
    
    for tt in range(0,ntests):
        
        print("test ",tt + 1,"/", ntests, sep="")
        Xall,G = dd.createData(np.max(ns))
        
        for n in ns:          
            
            X = Xall[:n,:]
            X = scale(X) # zero mean, sd one for all the features
            
            print("............sample size: ",n)
            
            for method in methods:
                
                kk,local = method
                
                if kk < 1:
                    k = max(3,int(np.ceil(kk*n)))
                else:
                    k = kk
                
                if local:
                    k_perm = 5
                else:
                    k_perm = None
                    
                knnest = KnnEstimator(k = k,k_perm = k_perm, rng = rr1, parallel = cores)
                    
                knn_sl = StructLearn(X, ci_estimator= knnest)    
                knn_sl.findMoralGraph()
                          
                for graph_rule in graph_rules:
                    
                    est_ug = knn_sl.getMoralGraph(graph_rule)
                    method_name = __method2str( (method[0],method[1],graph_rule) )
                
                    # compute Hamming distance                
                    hd = HD(G,est_ug)    
                        
                    # save stuff
                    print(method_name,hd)
                    allRes[method_name][n]["HD"].append(hd)
                    allRes[method_name][n]["UG"].append(est_ug)
               
        # save the true UG (this is differs between the tests only in random graph cases)
        parameters["trueUGs"].append(G)
               
        # save results after every 5 tests    
        if (tt + 1) % 5 == 0 and SAVE:
            parameters["ntests"] = tt + 1
            res = (allRes,parameters) 
            saveResults(res,filename)
    
    # final results
    parameters["ntests"] = tt + 1
    res = (allRes,parameters) 
    if SAVE:
        saveResults(res,filename)
    
    return res
Exemplo n.º 13
0
def doTests(testName, 
            folderName = None, 
            seed = 123456, 
            ntests = 25, 
            ns = None, 
            k = 3,  
            k_perm = None,
            methods = None,
            lambdaRatio = 0.01,
            useTransformation = False,
            SAVE = True):
    
    if methods is None:
        
        methods = ["knnMI_AND",
                   "knnMI_OR",
                   "fisherZ_AND",
                   "fisherZ_OR",
                   "mb_RIC",
                   "glasso_RIC",
                   "mb_STARS",
                   "glasso_STARS",
                   "mb_auto"]

    if ns is None:
        ns = [125,250,500,1000,2000]
    
    
    # different random number generators for data generation and for the knnMI method (permutation tests) 
    rrData = np.random.RandomState(seed)
    rr1 = np.random.RandomState(seed + 1) 
    
    # global rng is also used..
    np.random.seed(seed)
     
    cores = mp.cpu_count()
    
    # conditional independence tests
    knnEst1 = KnnEstimator(k = k,rng = rr1, parallel= cores,k_perm = k_perm)
    fEst = FisherCI()
    
    if "KCIT_OR" in methods or "KCIT_AND" in methods: 
        k_cit = KCIT(seed = seed + 2)
    if "RCIT_OR" in methods or "RCIT_AND" in methods:
        r_cit = RCIT(seed = seed + 3)
        
       
    # initilize dictionaries for results
    res = {"HD" : [], "UG" : [], "sparsity" : []} # measured quantities are keys
    Nres = {n : copy.deepcopy(res) for n in ns}
    allRes = {method : copy.deepcopy(Nres) for method in methods}
              
    # used parameters          
    parameters = {"seed": seed, 
                  "ntests": 0,
                  "ns": ns,
                  "testName" : testName,
                  "methods" : methods,
                  "k" : k,
                  "lambdaRatio" : lambdaRatio,
                  "trueUGs" : []}    
                
    # create folder where to save the resuls              
    if folderName is None:
        directory = "tests"
    else:
        directory = "tests/" + folderName
    
    if not os.path.exists(directory):
        os.makedirs(directory)            
        
    test_str = __test2str(testName)
    filename = directory + "/" + test_str + ".p"
     
    # create object for generating data and run the tests
    dd = DataGenerator(testName,rng = rrData)
    
    nonPara = True # use non-paranormal transformation for glasso and mb
    
    #DEBUG
    errorCount = 0     
    
    for tt in range(0,ntests):
        
        print("test ",tt + 1,"/", ntests, sep="")
        Xall,G = dd.createData(np.max(ns))
        
        for n in ns:          
            
            X = Xall[:n,:]
            X = scale(X) # zero mean, sd one for all the features
            
            if useTransformation:
                X = transform(X) # non-paranormal transformation for every method
                print("Transformation used.")
                nonPara = False # no need to perform the transformation twice when glasso/mb is called

            print("............sample size: ",n)
                  
            # kernel methods
            if "KCIT_OR" in methods or "KCIT_AND" in methods:
                kcitSl = StructLearn(X,ci_estimator = k_cit)
                kcitSl.findMoralGraph()
                
            if "RCIT_OR" in methods or "RCIT_AND" in methods: 
                rcitSl = StructLearn(X,ci_estimator = r_cit)
                rcitSl.findMoralGraph()
                
            # find Markov blankets for knnMI method
            if "knnMI_AND" in methods or "knnMI_OR" in methods:
                if k < 1:
                    knnEst1.k = max(3,int(np.ceil(k*n)))
                                        
                knnSl = StructLearn(X, ci_estimator= knnEst1)    
                knnSl.findMoralGraph()
                
            # same for fisherZ based method    
            if "fisherZ_AND" in methods or "fisherZ" in methods:
                fishSl = StructLearn(X, ci_estimator= fEst)
                fishSl.findMoralGraph()
    
            for method in methods:
                
                sp = np.nan # record sparsities of estimated graphs for glasso/mb, for other methods use just nan (graphs are saved so sparsity is easy to compute)
                
                # DEBUG
                seeeds = np.random.RandomState
                
                if method == "knnMI_AND":
                    estUG = knnSl.getMoralGraph("AND")
                elif method == "knnMI_OR":
                    estUG = knnSl.getMoralGraph("OR")                
                elif method == "fisherZ_AND":
                    estUG = fishSl.getMoralGraph("AND")
                elif method == "fisherZ_OR":
                    estUG = fishSl.getMoralGraph("OR")                    
                elif method == "glasso_RIC":
                    estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ric", nonPara=nonPara, lambdaRatio= lambdaRatio) 
                elif method == "glasso_BIC":
                    estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ebic", nonPara=nonPara, ebicTuning= 0.0,lambdaRatio= lambdaRatio)
                elif method == "glasso_EBIC":
                    estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ebic", nonPara=nonPara, ebicTuning= 0.5,lambdaRatio= lambdaRatio)
                elif method == "mb_RIC":
                    estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "ric", nonPara=nonPara,lambdaRatio= lambdaRatio) 
                elif method == "mb_auto":
                    estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "mbDefault", nonPara=nonPara)     
                elif method == "mb_STARS":
                    estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "stars", nonPara=nonPara,lambdaRatio= lambdaRatio)    
                elif method == "glasso_STARS":
                    estUG,sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "stars", nonPara=nonPara,lambdaRatio= lambdaRatio)
                elif method == "KCIT_AND":
                    estUG = kcitSl.getMoralGraph("AND")
                elif method == "KCIT_OR":
                    estUG = kcitSl.getMoralGraph("OR")
                elif method == "RCIT_AND":
                    estUG = rcitSl.getMoralGraph("AND")
                elif method == "RCIT_OR":
                    estUG = rcitSl.getMoralGraph("OR")
                else:
                    print("unspecified method!!")
                    hd = np.nan
              
                # DEBUG    
                if (estUG == estUG.T).all() == False:
                    errors = {"testName" : testName, "data": X, "method" : method, "currentSeed" : seeeds, "estUG": estUG, "trueUG": G, "testNumber" : tt +1 }
                    errorCount += 1
                
                    path = directory + "/errors_" + test_str + "_" + str(errorCount) + ".p"
                    saveResults(errors,path)
      
                    ## force symmetry on UG
                    estUG = 1*(estUG + estUG.T == 2)
                                        
                # compute Hamming distance
                hd = HD(G,estUG)    
                    
                # save stuff
                print(method,hd)
                allRes[method][n]["HD"].append(hd)
                allRes[method][n]["UG"].append(estUG)
                allRes[method][n]["sparsity"].append(sp)
                
        # save the true UG (this is differs between the tests only in random graph cases)
        parameters["trueUGs"].append(G)
               
        # save results after every 5 tests    
        if (tt + 1) % 5 == 0 and SAVE:
            parameters["ntests"] = tt + 1
            res = (allRes,parameters) 
            saveResults(res,filename)
    
    # final results
    parameters["ntests"] = tt + 1
    res = (allRes,parameters) 
    if SAVE:
        saveResults(res,filename)
    
    return res