def performanceForCDAAt(noOfTweets, fileName, **stream_settings): clustering=HDStreaminClustering(**stream_settings) ts = time.time() clustering.cluster(TwitterIterators.iterateFromFile(fileName)) te = time.time() documentClusters = [cluster.documentsInCluster.keys() for k, cluster in clustering.clusters.iteritems() if len(cluster.documentsInCluster.keys())>=stream_settings['cluster_filter_threshold']] return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te-ts)
def performanceForCDAITAt(noOfTweets, fileName, **stream_settings): ts = time.time() sstObject = SimilarStreamAggregation(dict(iterateTweetUsersAfterCombiningTweets(fileName, **stream_settings)), stream_settings['ssa_threshold']) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te-ts)
def performanceForCDAITAt(noOfTweets, fileName, **stream_settings): ts = time.time() sstObject = SimilarStreamAggregation( dict( iterateTweetUsersAfterCombiningTweets(fileName, **stream_settings)), stream_settings['ssa_threshold']) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te - ts)
def performanceForCDAAt(noOfTweets, fileName, **stream_settings): clustering = HDStreaminClustering(**stream_settings) ts = time.time() clustering.cluster(TwitterIterators.iterateFromFile(fileName)) te = time.time() documentClusters = [ cluster.documentsInCluster.keys() for k, cluster in clustering.clusters.iteritems() if len(cluster.documentsInCluster.keys()) >= stream_settings['cluster_filter_threshold'] ] return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te - ts)