Пример #1
0
	def update(self, data):
		# Save the old centroid and recompute TF_DT
		old_centroid = self.centroid
		self.tweets = data
		self.tf, self.dt = computeTF_DT(self.tweets)
		self.centroid = self.calculateCentroid()
		return float(tweet_distance(old_centroid, self.centroid)) / min(len(tokenise(old_centroid)), len(tokenise(self.centroid)))
Пример #2
0
	def __init__(self, data):
		if len(data) == 0: 
			raise Exception("ILLEGAL: empty cluster")
		
		self.tweets = [data]	# store a list of tweets
		self.centroid = data	# identify one of the tweets as centroid; initially it's just one tweet
		self.tf, self.dt = computeTF_DT(self.tweets)	# term frequency, dominant term