def __init__(self,categories, vocabulary=None): BasicClassificator.__init__(self, categories) if vocabulary is None: print 'Loading vocabulary..' self.vocabulary = Utility.load_vocabulary(num_words=100000) else: self.vocabulary = vocabulary self.vectorizer = TfidfVectorizer(vocabulary=self.vocabulary, stop_words=Utility.stop_words) print 'Vocabulary size: %d' % len(self.vocabulary)
def __init__(self, categories): BasicClassificator.__init__(self, categories) self.vocabulary = Utility.load_vocabulary(num_words=100000) self.vectorizer = TfidfVectorizer(vocabulary=self.vocabulary, stop_words=Utility.stop_words)