Exemplo n.º 1
0
def computeKMoutliers(synthD):
    num = 0
    stats = []
    for f in synthD:
        #reads data and parses first file in folder
        sd = readSynthetic(f)
        print("Iteration:", num)
        num += 1

        for i in range(5):
            #Running kMeansOut on the data
            kcent, cid, dist = kmo.kmeansOutliers(sd.data,
                                                  compute_phi_star(sd), sd.z,
                                                  sd.k)
            ans = kcent
            kmo_cost, index_list = kmo.cost(sd.data, cid, kcent, sd.z)
            average_cost = np.sum(kmo_cost)
            #Computing cost
            sd.costs.append(average_cost)

        #example for adding extra stats, i.e. time. For headers, go to top
        sd.extrastats = [0, num, num * num]

        printSD(sd)

        stats = addAnswer(stats, sd)

    return stats
Exemplo n.º 2
0
def computeKMOutliersLloyd(synthD):
    num = 0
    stats = []
    for f in synthD:
        #reads data and parses first file in folder
        sd = readSynthetic(f)
        sd.phistar = compute_phi_star(sd)
        print("-------------------------\nIteration:", num,
              "\n-------------------------")
        num += 1

        for rp in phistarprop:
            for j in range(1):
                sd.runphi = rp
                sd.runk = sd.k + j
                precs = []
                recs = []
                for i in range(10):
                    #Running kMeansOut on the data
                    centers, cid, dist, wins = kmo.kmeansOutliers(
                        sd.data, sd.phistar * sd.runphi, sd.z, sd.runk)
                    zind = []
                    for i in range(sd.k, sd.k + sd.z):
                        zind.append(i)
                    ans, cid, wins, prec, rec = lloyd.LloydOut(
                        sd.data, centers, sd.runk, sd.z, 1, 100, zind)
                    #kmo_cost, index_list = kmo.cost(sd.data, cid, ans, int(sd.z))
                    #average_cost= np.sum(kmo_cost)
                    cost2 = kmo.cost2(sd.data, ans, int(sd.z))
                    #print("Sharvaree_cost:", average_cost)
                    #assert(cost2 == average_cost)

                    #Computing cost
                    sd.costs.append(cost2)
                    precs.append(prec)
                    recs.append(rec)

                sd.precs = precs
                sd.recs = recs

                #example for adding extra stats, i.e. time. For headers, go to top
                sd.extrastats = [
                    sd.phistar,
                    mean(np.array(precs)),
                    max(precs),
                    mean(np.array(recs)),
                    max(recs)
                ]

                print(sd.phistar)
                printSD(sd)

                stats = addAnswer(stats, sd)
                sd.costs = []

    return stats
Exemplo n.º 3
0
def computeKMLS(synthD):
    num = 0
    stats = []
    for f in synthD:
        #reads data and parses first file in folder
        sd = readSynthetic(f)
        sd.phistar = compute_phi_star(sd)
        print("Iteration:", num)
        num += 1

        for j in range(int(sd.k / 2)):
            print("TrueCost:", sd.phistar)
            sd.runphi = 1
            sd.runk = sd.k + j
            precs = []
            recs = []
            for i in range(1):
                numpts = 2 * (sd.k + sd.z)
                #numpts = int(sd.n/2)

                sampleData = kmpp.kmeanspp(sd.data, numpts)  #kmpp sampling
                #sampleData = sd.data #No sampling, run on all
                #sampleData = ls.randomInit(sd.data,numpts) #uniformly random init

                #Running kcenterOut on the data
                ans, empz = ls.lsOut(sampleData, sd.runk, sd.z, eps)

                cost2 = kmo.cost2(sd.data, ans, int(sd.z))

                #Computing cost
                sd.costs.append(cost2)

                prec, rec = km.kMPrecRecallVar2(sd, ans, empz)
                precs.append(prec)
                recs.append(rec)

            sd.precs = precs
            sd.recs = recs

            #example for adding extra stats, i.e. time. For headers, go to top
            sd.extrastats = [
                sd.phistar,
                mean(np.array(precs)),
                max(precs),
                mean(np.array(recs)),
                max(recs)
            ]

            printSD(sd)

            stats = addAnswer(stats, sd)
            sd.costs = []

    return stats
Exemplo n.º 4
0
def computeKMPP(synthD):
    num = 0
    stats = []
    for f in synthD:
        #reads data and parses first file in folder
        sd = readSynthetic(f)
        sd.phistar = compute_phi_star(sd)
        print("Iteration:", num)
        num += 1

        for j in range(int(sd.k / 2)):
            print("TrueCost:", sd.phistar)
            sd.runphi = 1
            sd.runk = sd.k + j
            precs = []
            recs = []
            for i in range(10):
                ans = kmpp.kmeanspp(sd.data, sd.runk)

                cost2 = kmo.cost2(sd.data, ans, int(sd.z))

                #Computing cost
                sd.costs.append(cost2)

                prec, rec = km.kMPrecRecall(sd, ans)
                precs.append(prec)
                recs.append(rec)

            sd.precs = precs
            sd.recs = recs

            #example for adding extra stats, i.e. time. For headers, go to top
            sd.extrastats = [
                sd.phistar,
                mean(np.array(precs)),
                max(precs),
                mean(np.array(recs)),
                max(recs)
            ]

            printSD(sd)

            stats = addAnswer(stats, sd)
            sd.costs = []

    return stats
Exemplo n.º 5
0
def computeKMLSCoreset(synthD):
    num = 0
    stats = []
    for f in synthD:
        #reads data and parses first file in folder
        sd = readSynthetic(f)
        sd.phistar = compute_phi_star(sd)
        print("Iteration:", num)
        num += 1

        for j in range(int(sd.k / 2)):
            print("TrueCost:", sd.phistar)
            print(distance.cdist(sd.data[0:sd.k], sd.data[0:sd.k]))
            sd.runphi = 1
            sd.runk = sd.k + j
            precs = []
            recs = []
            for i in range(1):
                #Running kcenterOut on the data
                numpts = 2 * (sd.k + sd.z)
                print("numpts", numpts)
                #numpts = int(sd.n/10)
                #numpts = sd.k + sd.z
                ans, empz = ls.lsOutCor(sd.data,
                                        sd.runk,
                                        sd.z,
                                        eps,
                                        numpts,
                                        debug=True)

                cost2 = kmo.cost2(sd.data, ans, int(sd.z))

                print("------------\nBefore Lloyds:", cost2, empz)
                print(find_inds(sd.data, ans))
                prec, rec = km.kMPrecRecallVar2(sd, ans, int(sd.z))
                print("Prec, rec", prec, rec)
                cr = km.cr1(sd, ans)
                zind = []
                for i in range(sd.k, sd.k + sd.z):
                    zind.append(i)

                ans, cid, wins, prec, rec, garbage = lloyd.LloydOut(
                    sd.data, ans, sd.runk, sd.z, 1, 100, zind)

                cost2 = kmo.cost2(sd.data, ans, int(sd.z))

                #Computing cost
                sd.costs.append(cost2)

                print("After Lloyds:", cost2)
                print("Prec, rec", prec, rec)
                cr = km.cr1(sd, ans)
                input()
                precs.append(prec)
                recs.append(rec)

            sd.precs = precs
            sd.recs = recs

            #example for adding extra stats, i.e. time. For headers, go to top
            sd.extrastats = [
                sd.phistar,
                mean(np.array(precs)),
                max(precs),
                mean(np.array(recs)),
                max(recs)
            ]

            printSD(sd)

            stats = addAnswer(stats, sd)
            sd.costs = []

    return stats
Exemplo n.º 6
0
def compute_phi_star(sd):
    return kmo.cost2(sd.data[sd.k + sd.z:], sd.data[:sd.k], int(sd.z))