def performanceForCDAITAt(noOfTweets, fileName, **stream_settings): ts = time.time() sstObject = SimilarStreamAggregation(dict(iterateTweetUsersAfterCombiningTweets(fileName, **stream_settings)), stream_settings['ssa_threshold']) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te-ts)
def getStatsForSSA(self): print "SSA" ts = time.time() sstObject = SimilarStreamAggregation(dict(self._iterateUserDocuments()), self.stream_settings["ssa_threshold"]) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return self.getEvaluationMetrics(documentClusters, te - ts)
def getStatsForSSA(self): print 'SSA' ts = time.time() sstObject = SimilarStreamAggregation( dict(self._iterateUserDocuments()), self.stream_settings['ssa_threshold']) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return self.getEvaluationMetrics(documentClusters, te - ts)
def getStatsForSSA(): batchSize = 10000 default_experts_twitter_stream_settings['ssa_threshold']=0.75 for id in range(21, 50): fileName = time_to_process_points+'%s/%s'%(batchSize,id) ts = time.time() sstObject = SimilarStreamAggregation(dict(iterateUserDocuments(fileName)), default_experts_twitter_stream_settings['ssa_threshold']) sstObject.estimate() # documentClusters = list(sstObject.iterateClusters()) iteration_data = {'iteration_time': time.time()-ts, 'type': 'ssa', 'number_of_messages': batchSize*(id+1), 'batch_size': batchSize} FileIO.writeToFileAsJson(iteration_data, ssa_stats_file)
def performanceForCDAITAt(noOfTweets, fileName, **stream_settings): ts = time.time() sstObject = SimilarStreamAggregation( dict( iterateTweetUsersAfterCombiningTweets(fileName, **stream_settings)), stream_settings['ssa_threshold']) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te - ts)
def getStatsForSSA(): batchSize = 10000 default_experts_twitter_stream_settings['ssa_threshold'] = 0.75 for id in range(21, 50): fileName = time_to_process_points + '%s/%s' % (batchSize, id) ts = time.time() sstObject = SimilarStreamAggregation( dict(iterateUserDocuments(fileName)), default_experts_twitter_stream_settings['ssa_threshold']) sstObject.estimate() # documentClusters = list(sstObject.iterateClusters()) iteration_data = { 'iteration_time': time.time() - ts, 'type': 'ssa', 'number_of_messages': batchSize * (id + 1), 'batch_size': batchSize } FileIO.writeToFileAsJson(iteration_data, ssa_stats_file)
def test_estimate(self): nn = SimilarStreamAggregation(vectors, 0.99) nn.estimate() self.assertEqual([['1', '3', '2'], ['5', '7']], list(nn.iterateClusters()))