示例#1
0
def fast_clustering(coords):
    MAX_RMSD = 30  #Max RMSD to be considered in a cluster.
    for MAX_RMSD in [
            10, 15, 20, 25, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 30
    ]:
        clusters = []
        i_to_clust = []
        for i, coord in enumerate(coords):
            for j, cluster in enumerate(clusters):
                if ftur.rmsd(coord, coords[cluster[0]]) < MAX_RMSD:
                    cluster.append(i)
                    i_to_clust.append(j)
                    break
            else:
                clusters.append([i])
                i_to_clust.append(len(clusters) - 1)
        print("{} maxRMSD, {} clusters".format(MAX_RMSD, len(clusters)))
    print("Starting dists")
    dists = np.zeros((len(coords), len(coords)))
    for i, j in it.combinations(range(len(coords)), 2):
        if i_to_clust[i] == i_to_clust[j]:
            dists[i, j] = ftur.rmsd(coords[i], coords[j])
        else:
            dists[i, j] = float('inf')
    return i_to_clust, dists
 def topN_closest_cluster_nodes(self, n, img_index):
     query_cluster = self.labels[img_index]
     i = 0
     cluster = []
     for label in self.labels:
         if label == query_cluster and i != img_index:
             cluster.append(i)
         i += 1
     dist = dict(
         map(
             lambda x:
             (x, distance.euclidean(self.data[img_index], self.data[x])),
             cluster))
     tops = sorted(dist.items(), key=itemgetter(1))[:n]
     return tops
示例#3
0
def outwrite(vocab,labels, outpath):
    outfile = open(outpath,'w')
    labels1 = []
    for i in labels:
        try:
            labels1.append(i[0])
        except:
            labels1.append(i)
    for i in set(labels1):
        cluster = []
        str1 = ''
        for (word,label) in itertools.izip(vocab,labels):
            if label==i:
                cluster.append(word)
                str1 = str1 + word + ' '
        outfile.write("Cluster "+str(i)+":\t" + str1+'\n')
示例#4
0
def find_boundarieskmeans(binarylist, numclusters, clusterimportance):
    '''
    :param binarylist: list of clustering labels
    :param numclusters: number of clusters to identify
    :param clusterimportance: mu as discussed in thesis, normally expected segmentsize*0.5
    :return:boundaries detected using kboundaries
    '''
    def customdifference(point1, point2):
        if point1[0] == point2[0]:
            modifier = 0
        else:
            modifier = clusterimportance
        return abs(point1[1] - point2[1]) + modifier

    size = len(binarylist) / numclusters
    centers = [
        (i, size * i + size / 2) for i in range(numclusters)
    ]  #Initial guess is evenly spread centroids. This should conform to our expectation
    for i in range(25):
        clusters = [([], centers[i]) for i in range(numclusters)]
        for j in range(len(binarylist)):
            value = binarylist[j]
            point = [value, j]
            bestcenter = min(
                centers,
                key=lambda x: customdifference(point, x))  #Allocation step
            for cluster, center in clusters:
                if center == bestcenter:
                    cluster.append(point)
            #Updating center
        centers = []
        for cluster, center in clusters:
            if len(cluster) > 0:
                clustervalue = np.mean([x[0] for x in cluster])
                numericcenter = np.mean([x[1] for x in cluster])
            else:
                clustervalue = int(round(random.random() * numclusters))
                numericcenter = random.random() * len(binarylist)
            centers.append([int(round(clustervalue)), numericcenter])
        #Here we get boundaries from the centers
        boundaries = boundariesfromcenters(clusters)
    return boundaries
示例#5
0
def fast_clustering(coords):
    MAX_RMSD=30 #Max RMSD to be considered in a cluster.
    for MAX_RMSD in [10,15,20,25,35,40,45,50,55,60,65,70,75,80,30]:
        clusters = []
        i_to_clust=[]
        for i, coord in enumerate(coords):
            for j, cluster in enumerate(clusters):
                if ftur.rmsd(coord, coords[cluster[0]])<MAX_RMSD:
                    cluster.append(i)
                    i_to_clust.append(j)
                    break
            else:
                clusters.append([i])
                i_to_clust.append(len(clusters)-1)
        print("{} maxRMSD, {} clusters".format(MAX_RMSD, len(clusters)))
    print("Starting dists")
    dists = np.zeros((len(coords), len(coords)))
    for i,j in it.combinations(range(len(coords)),2):
        if i_to_clust[i]==i_to_clust[j]:
            dists[i,j]=ftur.rmsd(coords[i], coords[j])
        else:
            dists[i,j]= float('inf')
    return i_to_clust, dists
示例#6
0
def filter_isolated_idxs_new(idxs=[], maxdist=3.0):
    clusters = []
    cluster = []

    newidxs = np.zeros_like(idxs)

    for i in xrange(len(idxs)):
        if idxs[i]:
            cluster.append(i)

        if (i == len(idxs) or not idxs[i]) and len(cluster):
            clusters.append(cluster)
            cluster=[]

    for i in xrange(len(clusters)):
        valid = True
        if len(clusters[i]) <= 1:
            valid = False

        if valid:
            newidxs[clusters[i]] = True
            #print clusters[i], m.lcs[0][clusters[i]]

    return newidxs
 def _expand_cluster(self, sample_i, neighbors):
     cluster = [sample_i]
     # Iterate through neighbors
     for neighbor_i in neighbors:
         if not neighbor_i in self.visited_samples:
             self.visited_samples.append(neighbor_i)
             # Fetch the samples distant neighbors
             self.neighbors[neighbor_i] = self._get_neighbors(neighbor_i)
             # Make sure the neighbors neighbors are more than min_samples
             if len(self.neighbors[neighbor_i]) >= self.min_samples:
                 # Choose neighbors of neighbor except for sample
                 distant_neighbors = self.neighbors[neighbor_i][np.where(
                     self.neighbors[neighbor_i] != sample_i)]
                 # Add the neighbors neighbors as neighbors of sample
                 self.neighbors[sample_i] = np.concatenate(
                     (self.neighbors[sample_i], distant_neighbors))
                 # Expand the cluster from the neighbor
                 expanded_cluster = self._expand_cluster(
                     neighbor_i, self.neighbors[neighbor_i])
                 # Add expanded cluster to this cluster
                 cluster = cluster + expanded_cluster
         if not neighbor_i in np.array(self.clusters):
             cluster.append(neighbor_i)
     return cluster
# Welcher Datensatz das Cluster-Zentrum bildet
centers_indices = aff_prop_cluster.cluster_centers_indices_


print "--labels_---------------"
print [ lab for lab in labels ]

print "-- cluster_centers_indices_ -------"
print [i for i in centers_indices]



cluster = []

for c in centers_indices:
    cluster.append([])

for i, label in enumerate(labels):
    #print names[i]," : ", label
    cluster[label].append( [ i, symbols[i], names[i]] )


print "cluster"
print cluster


for j, cl in enumerate(cluster):

    # Bessere Ansicht
    # plt.figure()