Exemplo n.º 1
0
def main():

    N = 200
    K = 10
    B = 5

    # generate fake data
    data_mat = np.concatenate((sgt.genAnnulusCluster(
        (0.5, 0.5), 0.4, 0.3, N), sgt.genCluster((0.5, 0.5), 0.1, 100)),
                              axis=0)

    # get distance matrix:
    dist_mat = snt.getDistanceMatrix(data_mat)

    # cluster it
    clustering = AgglomerativeClustering(n_clusters=2,
                                         linkage='single',
                                         affinity='precomputed').fit(dist_mat)
    #elb_obj = sst.ElbowTest(K).fit(dist_mat, linkage='single')
    jump_obj = sst.JumpTest(K, B).fit(dist_mat, linkageMethod='single')

    # plot it:
    #spt.plotData(data_mat, labels=clustering.labels_, fname='../media/ring.singleLink1.png')
    #spt.plotElbowTest(data_mat, elb_obj)
    spt.plotJumpTest(data_mat, jump_obj, fname='../media/jumpRing.png')
Exemplo n.º 2
0
def main():

    # set program constants:
    K = 10  # max number of clusters
    B = 100  # number of null realisations created by gap test
    N = 100  # number of points in the input data cloud

    # generate clusters:
    n1, n2, n3 = 20, 20, 10
    n4 = N - n1 - n2 - n3
    cluster_vec = [
        sgt.genCluster((0.3, 0.7), 0.1, n1),
        sgt.genCluster((0.1, 0.3), 0.1, n2),
        sgt.genCluster((0.7, 0.5), 0.1, n3),
        sgt.genCluster((0.5, 0.1), 0.1, n4)
    ]

    # create data set:
    data_mat = np.concatenate(
        (cluster_vec[0], cluster_vec[1], cluster_vec[2], cluster_vec[3]),
        axis=0)
    data_mat = sgt.genPoisson(N)  # uniform point cloud

    # convert data to network:
    dist_mat = snt.getDistanceMatrix(data_mat)

    den_obj = sst.DendroTest(K).fit(dist_mat)

    # plot the top p levels of the dendrogram
    spt.plotDendroTest(data_mat, den_obj, fname='../media/dendro3.png')
Exemplo n.º 3
0
def main():

    # set program constants:
    K = 10  # max number of clusters
    B = 100  # number of null realisations created by gap test
    N = 100  # number of points in the input data cloud

    # generate clusters:
    n1, n2 = 30, 20
    n3 = N - n1 - n2
    cluster_vec = [
        sgt.genCluster((0.3, 0.7), 0.1, n1),
        sgt.genCluster((0.1, 0.3), 0.1, n2),
        sgt.genCluster((0.7, 0.5), 0.1, n3)
    ]

    # create data set:
    data_mat = np.concatenate((cluster_vec[0], cluster_vec[1], cluster_vec[2]),
                              axis=0)
    #data_mat = sgt.genPoisson(N)	# uniform point cloud

    # convert data to network:
    dist_mat = snt.getDistanceMatrix(data_mat)

    # perform gap test
    gap_obj = sst.GapTest(K, B).fit(dist_mat)
    spt.plotGapTest(data_mat, gap_obj, fname='../media/gap.png')
Exemplo n.º 4
0
def main():

    N = 200  # number of points in each cluster
    c = (0.5, 0.5)  # center of cluster

    # generate the data:
    #data = mgt.genCigar(0.3,0.05,c,N, rot_angle=0.3*np.pi)
    #data = mgt.genDisk(0.3, c, N)
    #data = mgt.genAnnulus(0.1, 0.3, c, N)
    #data = mgt.genRect(0.2,0.5,c,N)
    #data = mgt.genCross(0.8,0.6,0.1,c,N,rot_angle=7.)
    data = mgt.genS(0.6, 0.1, c, N)
    #data = mgt.genEight(0.6, 0.1,c,N)
    #data = mgt.genGauss(c, 0.1, 0.02, N, rot_angle=10.)

    # pairwise separation matrix:
    dist = snt.getDistanceMatrix(data)

    # compute filter values
    fil_vec = mst.filterDensity(dist, 0.1)
    #fil_vec = mst.filterEccentric(dist, 3)
    #fil_vec = mst.filterLinfinity(dist)

    # print stats:
    st_obj = mst.StatsTable(dist)
    st_obj.printStatsTable()

    # plot the data:
    #mpt.plotPoints(data, labels=fil_vec, fname='../media/cigar.ecc.png')
    #mpt.plotShapeHist(data,dist,fname='../media/cigar.shapeHist.png')
    mpt.plotPointsFilterHist(data,
                             fil_vec,
                             fname='../media/s.filterHist.den.png')
Exemplo n.º 5
0
def main():

    # generate clusters:
    data_mat = np.concatenate((sgt.genCluster(
        (0.2, 0.5), 0.15, 6), sgt.genCluster(
            (0.6, 0.7), 0.15, 5), sgt.genCluster((0.7, 0.2), 0.15, 3)),
                              axis=0)

    # convert data to network:
    dist_mat = snt.getDistanceMatrix(data_mat)
    #GX = snt.graphFromDistMatrix(DX)

    # perform clustering on the network:
    #clustering = AgglomerativeClustering(n_clusters=3).fit(X)	# with raw data
    clustering = AgglomerativeClustering(n_clusters=3,
                                         linkage='average',
                                         affinity='precomputed').fit(
                                             dist_mat)  # with distance matrix

    # plot it:
    #spt.plotData(X, annotate=True)
    #spt.plotData(X, labels=clustering.labels_)
    #spt.plotData(X, labels=clustering.labels_, fName='../media/clusters.pdf')
    #spt.plotGraph(GX)

    #spt.plotData(X, fName="../media/pointCloud.png")
    #spt.plotClustersGraph(clustering.labels_, fname='../media/clusters.graph.png')
    #spt.plotPointsGraph(X)

    spt.plotClustersAndGraph(data_mat,
                             clustering.labels_,
                             ffolder='../media/',
                             flabel=sys.argv[1])
Exemplo n.º 6
0
def main():

    N = 200  # number of points in each cluster
    M = 3  # number of sets in the cover
    K = 5  # max number of clusters in each cover member set
    B = 100  # number of null realisations created each gap test
    c = (0.5, 0.5)  # center of cluster

    # generate the data:
    data = mgt.genCigar(0.3, 0.05, c, N, rot_angle=0.3 * np.pi)
    #data = mgt.genDisk(0.3, c, N)
    #data = mgt.genAnnulus(0.1, 0.3, c, N)
    #data = mgt.genRect(0.2,0.5,c,N)
    #data = mgt.genCross(0.8,0.6,0.1,c,N,rot_angle=0.)
    #data = mgt.genS(0.6, 0.1,c,N)
    #data = mgt.genEight(0.6, 0.1,c,N)
    #data = mgt.genGauss(c, 0.1, 0.02, N, rot_angle=10.)

    # pairwise separation matrix:
    dist = snt.getDistanceMatrix(data)

    # print stats:
    st_obj = mst.StatsTable(dist)
    st_obj.printStatsTable()

    # compute filter values
    #fil_vec = mst.filterDensity(dist, 0.1)
    #fil_vec = mst.filterEccentric(dist, 3)
    fil_vec = mst.filterLinfinity(dist)

    eps = 0.4

    # initialise mapper object:
    mp = mst.Mapper(min(fil_vec), max(fil_vec), M, eps, K, B)
    mp.fit(data, dist, fil_vec)

    # plot the data:
    #mpt.plotPoints(data, labels=fil_vec, fname='../media/cigar.ecc.png')
    #mpt.plotShapeHist(data,dist,fname='../media/cigar.shapeHist.png')
    mpt.plotPointsFilterHist(data,
                             fil_vec,
                             fname='../media/cigar.filterHist.lin.png')
    mpt.plotGraph(mp.clusterGraph,
                  '../media/cigar.mapperGraph.png',
                  colorAtt='avfvalue')
Exemplo n.º 7
0
    def fit(self, dist_mat, linkageMethod='average'):

        n_points = len(dist_mat[:, 0])
        print(n_points)

        # compute data dendrogram:
        model = AgglomerativeClustering(linkage=linkageMethod,
                                        affinity='precomputed',
                                        distance_threshold=0,
                                        n_clusters=None).fit(dist_mat)
        self.linkage_mat = get_linkage_matrix(model)

        p = self.maxclusters + 1
        distances = self.linkage_mat[:, 2][:-p:-1]
        self.w_log_vec = np.log10(distances)

        # generate realisations:
        wnull_log_mat = np.empty([self.maxclusters, self.nreals])
        for b in range(self.nreals):
            distnull_mat = snt.getDistanceMatrix(sgt.genPoisson(n_points))
            model = AgglomerativeClustering(linkage=linkageMethod,
                                            affinity='precomputed',
                                            distance_threshold=0,
                                            n_clusters=None).fit(distnull_mat)
            self.null_linkage_mat = get_linkage_matrix(model)
            distances = self.null_linkage_mat[:, 2][:-p:-1]
            wnull_log_mat[:, b] = np.log10(distances)

        self.wnull_log_average_vec = wnull_log_mat.mean(1)
        self.wnull_log_err_vec = wnull_log_mat.var(1) \
         *(1 + 1/self.nreals)**0.5

        self.gap = self.wnull_log_average_vec - self.w_log_vec

        # estimate optimal k:
        for i in range(self.maxclusters - 1):
            if self.gap[i] >= self.gap[i + 1] - self.wnull_log_err_vec[i + 1]:
                print(f"optimal k: {i+1}")
                break

        return self
Exemplo n.º 8
0
def main():

    # generate the data:
    data = np.concatenate((sgt.genCluster(
        (0.2, 0.2), 0.1, 20), sgt.genCluster(
            (0.6, 0.7), 0.1, 20), sgt.genCluster((0.8, 0.2), 0.1, 20)),
                          axis=0)

    #data = sgt.genPoisson(60)
    #spt.plotData(data)

    # compute distance matrix:
    dist = snt.getDistanceMatrix(data)

    # cluster the data:
    clustering = AgglomerativeClustering(linkage='average',
                                         affinity='precomputed',
                                         n_clusters=3).fit(dist)

    # for each cluster, print  cluster and shape histogram:
    #spt.plotData(data, labels=clustering.labels_)
    mpt.plotShapeHist(data, dist, labels=clustering.labels_)
Exemplo n.º 9
0
def main():

    K = 10  # max number of clusers

    # fix fake data parameters:
    n1, n2, n3 = 30, 20, 15  # number of points in each cluster
    R1, R2, R3 = 0.3, 0.1, 0.1  # max radius of each cluster

    # generate clusters:
    X1 = sgt.genCluster((0.3, 0.7), R1, n1)
    X2 = sgt.genCluster((0.1, 0.3), R2, n2)
    X3 = sgt.genCluster((0.7, 0.5), R3, n3)

    # create data set:
    n = n1 + n2 + n3  # number of points in fake data set
    X = np.concatenate((X1, X2, X3), axis=0)
    #X = sgt.genPoisson(n)	# uniform point cloud

    # convert data to network:
    dist_mat = snt.getDistanceMatrix(X)

    # perform elbow test:
    elbow_obj = sst.ElbowTest(K).fit(dist_mat)
    spt.plotElbowTest(X, elbow_obj, fname='../media/elbow.png')
Exemplo n.º 10
0
def main():

    N = 200  # number of points in each cluster
    c = (0.5, 0.5)  # center of cluster

    # generate the data:
    #data = mgt.genCigar(0.3,0.05,c,N, rot_angle=0.3*np.pi)
    data = mgt.genDisk(0.3, c, N)
    #data = mgt.genAnnulus(0.1, 0.3, c, N)
    #data = mgt.genRect(0.2,0.5,c,N)
    #data = mgt.genCross(0.8,0.6,0.1,c,N,rot_angle=7.)
    #data = mgt.genS(0.6, 0.1,c,N)
    #data = mgt.genEight(0.6, 0.1,c,N)

    # pairwise separation matrix:
    dist = snt.getDistanceMatrix(data)

    # print stats:
    st_obj = mst.StatsTable(dist)
    st_obj.printStatsTable()

    # plot the data:
    #mpt.plotPoints(data, fname='../media/cigar.png')
    mpt.plotShapeHist(data, dist, fname='../media/disk.hist.png')