Exemplo n.º 1
0
 def execute(self, docs):
     """
     @see parent
     """
     clusters = []
     
     # First cluster contains all of the docs.
     cluster = dotdict()
     cluster.docs = []
     
     for doc in self.bible.docs:
         doc.cluster = cluster
         cluster.docs.append(doc)
     
     cluster.centroid = self.centroid(cluster.docs)
     
     # Append the first cluster to the cluster list.
     clusters.append(cluster)
     
     while len(clusters) != self.k:
         # Use the abstract select cluster method.
         cluster = self.select_cluster(clusters)
                     
         # Remove this cluster from the current set because it will be split.
         clusters.remove(cluster)
         
         max_sim = float("-inf")
         max_bicluster = None
                     
         for i in range(self.iter):
             # Free the docs from whatever cluster they are in.
             for doc in cluster.docs:
                 doc.cluster = None
             
             kmeans = KMeans(2)
             kmeans.bible = self.bible
             
             kmeans.execute(cluster.docs)
             bicluster = kmeans.clusters
             
             sim = kmeans.similarity()
             if sim > max_sim:
                 max_sim = sim
                 max_bicluster = bicluster
         
         # Re-assign the documents to their respective max bicluster.
         for cluster in bicluster:
             for doc in cluster.docs:
                 doc.cluster = cluster
         
         # Add the new max bicluster to the current cluster set.
         clusters.extend(bicluster)
     
     self.clusters = clusters
Exemplo n.º 2
0
    def execute(self, docs):
        """
        Overloads UPGMA's execute and runs the 
        results through K-Means.
        
        Key arguments:
        docs -- the docs to cluster.
        """
        super(AgglomerativeUPGMAKMeans, self).execute(docs)

        kmeans = KMeans(self.k)
        kmeans.bible = self.bible

        # Clusters will be changed within this method.
        kmeans.execute(docs, self.clusters)
Exemplo n.º 3
0
 def execute(self, docs):
     """
     Overloads UPGMA's execute and runs the 
     results through K-Means.
     
     Key arguments:
     docs -- the docs to cluster.
     """
     super(AgglomerativeUPGMAKMeans, self).execute(docs)
     
     kmeans = KMeans(self.k)
     kmeans.bible = self.bible
     
     # Clusters will be changed within this method.
     kmeans.execute(docs, self.clusters)
Exemplo n.º 4
0
    def execute(self, docs):
        """
        @see parent
        """
        clusters = []

        # First cluster contains all of the docs.
        cluster = dotdict()
        cluster.docs = []

        for doc in self.bible.docs:
            doc.cluster = cluster
            cluster.docs.append(doc)

        cluster.centroid = self.centroid(cluster.docs)

        # Append the first cluster to the cluster list.
        clusters.append(cluster)

        while len(clusters) != self.k:
            # Use the abstract select cluster method.
            cluster = self.select_cluster(clusters)

            # Remove this cluster from the current set because it will be split.
            clusters.remove(cluster)

            max_sim = float("-inf")
            max_bicluster = None

            for i in range(self.iter):
                # Free the docs from whatever cluster they are in.
                for doc in cluster.docs:
                    doc.cluster = None

                kmeans = KMeans(2)
                kmeans.bible = self.bible

                kmeans.execute(cluster.docs)
                bicluster = kmeans.clusters

                sim = kmeans.similarity()
                if sim > max_sim:
                    max_sim = sim
                    max_bicluster = bicluster

            # Re-assign the documents to their respective max bicluster.
            for cluster in bicluster:
                for doc in cluster.docs:
                    doc.cluster = cluster

            # Add the new max bicluster to the current cluster set.
            clusters.extend(bicluster)

        self.clusters = clusters