Пример #1
0
	def __init__(self,docs,num_clu):
		self.no_clusters = num_clu
		#self.sentences =  preprocessing.load_sentences(docs)
		self.sentences =  preprocessing.load_duc_xml(docs)
		self.sent_no_swords = preprocessing.remove_stopwords(self.sentences)
		#self.full_doc = helper.fulldoc(self.sentences)
		#self.sent_no_swords.append(self.full_doc)
		self.unique_terms = helper.uniqueterms(self.sent_no_swords)
		self.sent_weight = helper.tfisf(self.sent_no_swords,self.unique_terms)
		#self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms)
		self.sent_similarity = helper.similarity(self.sent_weight,self.sent_weight)
		self.clusters = cluster.kmedoid(self.sent_similarity,self.no_clusters)
Пример #2
0
	def __init__(self,docs,num_clu):
		self.no_clusters = num_clu
		print "Loading Sentences..."
		self.sentences =  preprocessing.load_sentences(docs)
		print "Preprocessing..."
		self.sent_no_swords = preprocessing.remove_stopwords(self.sentences)
		self.unique_terms = helper.uniqueterms(self.sent_no_swords)
		self.sent_weight = helper.tfisf(self.sent_no_swords,self.unique_terms)
		#self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms)
		print "Finding Similarity Graph..."
		self.sent_similarity = helper.similarity(self.sent_weight,self.sent_weight)
		print "Clustering..."
		self.clusters = cluster.kmedoid(self.sent_similarity,self.no_clusters)
		'''
Пример #3
0
 def __init__(self, docs, num_clu):
     self.no_clusters = num_clu
     print "Loading Sentences..."
     self.sentences = preprocessing.load_sentences(docs)
     print "Preprocessing..."
     self.sent_no_swords = preprocessing.remove_stopwords(self.sentences)
     self.unique_terms = helper.uniqueterms(self.sent_no_swords)
     self.sent_weight = helper.tfisf(self.sent_no_swords, self.unique_terms)
     #self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms)
     print "Finding Similarity Graph..."
     self.sent_similarity = helper.similarity(self.sent_weight,
                                              self.sent_weight)
     print "Clustering..."
     self.clusters = cluster.kmedoid(self.sent_similarity, self.no_clusters)
     '''