mi_list.sort(self.cmp_mi_list, reverse=True) return mi_list def top_n_terms(self, n=100): ''' Returns top n terms with highest MI ''' top_list = [] for item in self.terms_sorted()[0:n]: top_list.append(item['term']) return top_list if __name__ == '__main__': mx = MatrixExpress() #mx = Matrix() mx.add_doc(doc_id='1', doc_terms=['apple', 'mac', 'iphone', 'mac'], doc_class='apple', frequency=True, do_padding=True) mx.add_doc(doc_id='2', doc_terms=['windows', 'word', 'excel', 'office'], doc_class='microsoft', frequency=True, do_padding=True) mx.add_doc(doc_id='3', doc_terms=['computer', 'mac', 'iphone', 'ipad'], doc_class='apple', frequency=True,
return mi_list def top_n_terms(self, n=100): ''' Returns top n terms with highest MI ''' top_list = [] for item in self.terms_sorted()[0:n]: top_list.append(item['term']) return top_list if __name__ == '__main__': mx = MatrixExpress() #mx = Matrix() mx.add_doc(doc_id='1', doc_terms=['apple', 'mac', 'iphone', 'mac'], doc_class= 'apple', frequency=True, do_padding=True) mx.add_doc(doc_id='2', doc_terms=['windows', 'word', 'excel', 'office'], doc_class= 'microsoft', frequency=True, do_padding=True) mx.add_doc(doc_id='3', doc_terms=['computer', 'mac', 'iphone', 'ipad'], doc_class= 'apple', frequency=True, do_padding=True) mx.add_doc(doc_id='4', doc_terms=['excel', 'computer', 'office', 'xp'],