def __init__(self,docs,num_clu): self.no_clusters = num_clu #self.sentences = preprocessing.load_sentences(docs) self.sentences = preprocessing.load_duc_xml(docs) self.sent_no_swords = preprocessing.remove_stopwords(self.sentences) #self.full_doc = helper.fulldoc(self.sentences) #self.sent_no_swords.append(self.full_doc) self.unique_terms = helper.uniqueterms(self.sent_no_swords) self.sent_weight = helper.tfisf(self.sent_no_swords,self.unique_terms) #self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms) self.sent_similarity = helper.similarity(self.sent_weight,self.sent_weight) self.clusters = cluster.kmedoid(self.sent_similarity,self.no_clusters)
def __init__(self,docs,num_clu): self.no_clusters = num_clu print "Loading Sentences..." self.sentences = preprocessing.load_sentences(docs) print "Preprocessing..." self.sent_no_swords = preprocessing.remove_stopwords(self.sentences) self.unique_terms = helper.uniqueterms(self.sent_no_swords) self.sent_weight = helper.tfisf(self.sent_no_swords,self.unique_terms) #self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms) print "Finding Similarity Graph..." self.sent_similarity = helper.similarity(self.sent_weight,self.sent_weight) print "Clustering..." self.clusters = cluster.kmedoid(self.sent_similarity,self.no_clusters) '''
def __init__(self, docs, num_clu): self.no_clusters = num_clu print "Loading Sentences..." self.sentences = preprocessing.load_sentences(docs) print "Preprocessing..." self.sent_no_swords = preprocessing.remove_stopwords(self.sentences) self.unique_terms = helper.uniqueterms(self.sent_no_swords) self.sent_weight = helper.tfisf(self.sent_no_swords, self.unique_terms) #self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms) print "Finding Similarity Graph..." self.sent_similarity = helper.similarity(self.sent_weight, self.sent_weight) print "Clustering..." self.clusters = cluster.kmedoid(self.sent_similarity, self.no_clusters) '''