Exemplo n.º 1
0
def test5():
    numpy.random.seed(42)

    data = numpy.zeros((5000, 2))

    idx = numpy.random.choice(data.shape[0],
                              int(data.shape[0] / 2),
                              replace=False)

    data[idx, 1] = 1

    idx0 = data[:, 1] == 0
    idx1 = data[:, 1] == 1

    data[idx0, 0] = numpy.random.normal(100, 30, numpy.sum(idx0))

    data[idx1, 0] = numpy.random.normal(200, 30, numpy.sum(idx1))

    print(data)

    featureNames = ["Gaussian", "Categorical"]
    featureTypes = ["continuous", "discrete"]

    # spn = SPN.LearnStructure(data, featureTypes=featureTypes, featureNames=featureNames, row_split_method=Splitting.KmeansRows(), col_split_method=Splitting.IndependenceTest(alpha=0.01),
    # spn = SPN.LearnStructure(data, featureTypes=featureTypes,
    # featureNames=featureNames, row_split_method=Splitting.KmeansRows(),
    # col_split_method=Splitting.RDCTest(),
    spn = SPN.LearnStructure(data,
                             featureTypes=featureTypes,
                             featureNames=featureNames,
                             row_split_method=Splitting.KmeansRows(),
                             col_split_method=Splitting.RDCTestOHEpy(),
                             min_instances_slice=500,
                             cluster_first=True)

    spn.root.validate()

    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib.collections import PolyCollection
    from matplotlib.colors import colorConverter
    import matplotlib.pyplot as plt
    import numpy as np

    fig = plt.figure()
    ax = fig.gca(projection='3d')

    cc = lambda arg: colorConverter.to_rgba(arg, alpha=0.6)

    xs = np.arange(0, 300, 0.5)
    verts = []
    zs = [0, 1]

    maxys = 0
    for z in zs:
        testdata = numpy.zeros((len(xs), len(zs)))
        testdata[:, 0] = xs
        testdata[:, 1] = z

        ys = numpy.zeros_like(xs)

        ys[:] = numpy.exp(spn.root.eval(testdata))

        maxys = max(maxys, numpy.max(ys))

        ys[0], ys[-1] = 0, 0
        verts.append(list(zip(xs, ys)))

    poly = PolyCollection(verts, facecolors=[cc('r'), cc('g')])
    poly.set_alpha(0.7)
    ax.add_collection3d(poly, zs=zs, zdir='y')

    ax.set_xlabel('X')
    ax.set_xlim3d(0, 300)
    ax.set_ylabel('Y')
    ax.set_ylim3d(-1, 1)
    ax.set_zlabel('Z')
    ax.set_zlim3d(0, maxys)

    plt.show()

    ll = spn.root.eval(data)

    print("Sum LL", numpy.sum(ll))
Exemplo n.º 2
0
def test6():
    numpy.random.seed(42)

    datablocks = []

    yd = [0, 1, 2, 3]
    xd = [0, 1]

    for x in xd:
        for y in yd:
            block = numpy.zeros((2000, 3))
            block[:, 1] = x
            block[:, 2] = y
            if (x == 1 and y == 0) or (x == 0 and y == 1) or (
                    x == 1 and y == 2) or (x == 0 and y == 3):
                block[:, 0] = numpy.random.normal(200, 30, block.shape[0])
            else:
                block[:, 0] = numpy.random.normal(100, 30, block.shape[0])

            datablocks.append(block)

    data = numpy.vstack(datablocks)

    print(data.shape)

    featureNames = ["Gaussian", "Categorical", "Discrete"]
    featureTypes = ["continuous", "categorical", "discrete"]

    # spn = SPN.LearnStructure(data, featureTypes=featureTypes, featureNames=featureNames, row_split_method=Splitting.KmeansRows(), col_split_method=Splitting.IndependenceTest(alpha=0.01),
    # spn = SPN.LearnStructure(data, featureTypes=featureTypes,
    # featureNames=featureNames, row_split_method=Splitting.KmeansRows(),
    # col_split_method=Splitting.RDCTest(),
    spn = SPN.LearnStructure(data,
                             featureTypes=featureTypes,
                             featureNames=featureNames,
                             row_split_method=Splitting.KmeansRows(),
                             col_split_method=Splitting.RDCTestOHEpy(),
                             min_instances_slice=50,
                             cluster_first=True)

    spn.root.validate()

    from matplotlib.collections import PolyCollection
    from matplotlib.colors import colorConverter
    import matplotlib.pyplot as plt
    import matplotlib.gridspec as gridspec

    gs = gridspec.GridSpec(len(xd), len(yd))

    fig = plt.figure(figsize=(8, 8))

    xall = numpy.arange(0, 300, 0.5)
    i = 0
    for x in xd:
        for y in yd:
            testdata = numpy.zeros((len(xall), 3))
            testdata[:, 0] = xall
            testdata[:, 1] = x
            testdata[:, 2] = y

            pbs = numpy.zeros_like(xall)

            pbs[:] = numpy.exp(spn.root.eval(testdata))

            ax = plt.Subplot(fig, gs[i])
            i += 1

            ax.set_title('%s %s' % (x, y))
            ax.plot(xall, pbs, 'r--')

            fig.add_subplot(ax)

    plt.show()

    ll = spn.root.eval(data)

    print("Sum LL", numpy.sum(ll))
Exemplo n.º 3
0
def test4():
    numpy.random.seed(42)
    dsname, data, featureNames, featureTypes, doms = getAdult()

    data = data[:, [1, 2, 3, 4]]
    featureTypes = [
        featureTypes[1], featureTypes[2], featureTypes[3], featureTypes[4]
    ]
    featureNames = [
        featureNames[1], featureNames[2], featureNames[3], featureNames[4]
    ]
    doms = [doms[1], doms[2], doms[3], doms[4]]

    doctorateVal = numpy.where(doms[1] == "Doctorate")[0][0]
    stategovVal = numpy.where(doms[0] == "State-gov")[0][0]

    print(featureNames)

    print(data[0, :])
    print(doctorateVal, stategovVal)

    pD = numpy.sum(data[:, 1] == doctorateVal) / data.shape[0]
    pSD = numpy.sum(
        numpy.logical_and(data[:, 1] == doctorateVal, data[:, 0]
                          == stategovVal)) / data.shape[0]
    pS = numpy.sum(data[:, 0] == stategovVal) / data.shape[0]

    print("pD", pD)
    print("pSD", pSD)
    pS_D = pSD / pD
    print("pS|D", pS_D)

    # spn = SPN.LearnStructure(data, featureTypes=featureTypes, featureNames=featureNames, row_split_method=Splitting.KmeansRows(), col_split_method=Splitting.IndependenceTest(alpha=0.01),
    # spn = SPN.LearnStructure(data, featureTypes=featureTypes,
    # featureNames=featureNames, row_split_method=Splitting.KmeansRows(),
    # col_split_method=Splitting.RDCTest(),
    spn = SPN.LearnStructure(data,
                             featureTypes=featureTypes,
                             featureNames=featureNames,
                             row_split_method=Splitting.KmeansRows(),
                             col_split_method=Splitting.RDCTestOHEpy(),
                             min_instances_slice=100,
                             cluster_first=True)

    spn.root.validate()

    print("SPN Learned")
    margSPN_SD = spn.root.marginalizeOut(
        [2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 13])
    margSPN_SD.Prune()

    print(margSPN_SD)

    dataSD = numpy.zeros_like(data[0, :]).reshape(1, data.shape[1])
    dataSD[0, 0] = stategovVal
    dataSD[0, 1] = doctorateVal
    print(dataSD)

    spnSD = (numpy.exp(margSPN_SD.eval(dataSD)))

    margSPN_D = spn.root.marginalizeOut(
        [0, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 13])
    margSPN_D.Prune()

    print(margSPN_D)

    dataD = numpy.zeros_like(data[0, :]).reshape(1, data.shape[1])
    dataD[0, 1] = doctorateVal
    print(dataD)

    spnD = (numpy.exp(margSPN_D.eval(dataD)))

    print("pD", pD)
    print("pS", pS)
    print("pSD", pSD)
    pS_D = pSD / pD
    print("pS_D", pS_D)

    print("spn pD", spnD)
    print("spn pSD", spnSD)
    spnS_D = spnSD / spnD
    print("spn pS_D", spnS_D)

    print("doctorateVal", doctorateVal)
    print("stategovVal", stategovVal)

    ll = spn.root.eval(data)

    # print("Probs", numpy.exp(ll))
    print("Sum LL", numpy.sum(ll))