def learn(): stoplist = makeStoplist() features = extractFeaturesFromFile(stoplist=stoplist) vectorizer = TfidfVectorizer(encoding=ENCODING) X_train = vectorizer.fit_transform( [" ".join(feature[1:]) for feature in features]) y_train = np.zeros(len(features)) for i in range(len(features)): if features[i][0] == "+1": y_train[i] = 1 clf = LogisticRegression() clf.fit(X_train, y_train) io.savemat("X_train", {"X_train": X_train}) np.save("y_train", y_train) joblib.dump(vectorizer, "tfidf.vec") clf.save("logreg")