def test_countvectorizer_custom_vocabulary(): what_we_like = ["pizza", "beer"] vect = CountVectorizer(vocabulary=what_we_like) vect.fit(JUNK_FOOD_DOCS) assert_equal(set(vect.vocabulary), set(what_we_like)) X = vect.transform(JUNK_FOOD_DOCS) assert_equal(X.shape[1], len(what_we_like))
class SVM: def __init__(self, training, classes, vocabulary): vocabulary = load(vocabulary) self.cv = CountVectorizer(vocabulary = vocabulary.tolist()) self.samples = load(training).tolist() self.classes = load(classes) self.classifier = LinearSVC() self.classifier.fit(self.samples, self.classes) def classify(self, text): features = self.cv.transform([text]) return self.classifier.predict(features)[0]