def main_cluster(rootpath, folderpath, vectorizer, numclusters): """Get main function for getCluster.""" folderPath = os.path.join(folderpath, 'final') vectorizer = vectorizer numClusters = numclusters preprocessor = Utility.PreprocessData(rootpath) gc = Clustering.GetCluster(vectorizer, rootpath) # get the kmeans model print("Getting the k-means model...") startTime = time.time() km = gc.getKmeans(folderPath, numClusters) print("---------- K-means: {} seconds ----------".format(time.time() - startTime)) # get the doc2Label print("Getting doc to label...") gc.getDoc2Label(folderPath, km) # get Label2Doc print("Getting label to doc...") gc.getLabel2Doc(folderPath, km) # get tweets.pkl for each clusters print("Storing tweets for clusters...") preprocessor.storeTweets4Clusters(folderPath)
def getCluster(self, vectorizer, numclusters): """Get main function for getCluster. Parameters ---------- vectorizer : str the vectorizer used in addressing word2vec options: 'mean', 'tfidf' numclusters : int the number of clusters Returns ------- None """ folderPath = os.path.join(self.folderpath, 'final') vectorizer = vectorizer numClusters = numclusters preprocessor = Utility.PreprocessData(self.rootpath) gc = Clustering.GetCluster(vectorizer, self.rootpath) # get the kmeans model print("Getting the k-means model...") startTime = time.time() km = gc.getKmeans(folderPath, numClusters) print("---------- K-means: {} seconds ----------".format(time.time() - startTime)) # get the doc2Label print("Getting doc to label...") gc.getDoc2Label(folderPath, km) # get Label2Doc print("Getting label to doc...") gc.getLabel2Doc(folderPath, km) # get tweets.pkl for each clusters print("Storing tweets for clusters...") preprocessor.storeTweets4Clusters(folderPath)