Exemplo n.º 1
0
def extractFeaturesFromFile(data_file="sentiment.txt", stoplist=None):
    res = []
    if stoplist is None:
        stoplist = makeStoplist()
    with open(data_file, "r", encoding=ENCODING) as f:
        for line in f:
            res.append(extractFeaturesFromString(line, stoplist))
    return res
Exemplo n.º 2
0
def learn():

	stoplist = makeStoplist()
	features = extractFeaturesFromFile(stoplist=stoplist)
	vectorizer = TfidfVectorizer(encoding=ENCODING)
	X_train = vectorizer.fit_transform([" ".join(feature[1:]) for feature in features])
	y_train = np.zeros(len(features))
	for i in range(len(features)):
		if features[i][0] == "+1":
			y_train[i] = 1
	clf = LogisticRegression()
	clf.fit(X_train, y_train)

	io.savemat("X_train", {"X_train": X_train})
	np.save("y_train", y_train)
	joblib.dump(vectorizer, "tfidf.vec")
	joblib.dump(clf, "logreg.clf")
Exemplo n.º 3
0
#coding:utf-8

import sys
from sklearn.externals import joblib
from question71 import makeStoplist
from question72 import extractFeaturesFromString

if __name__ == "__main__":

    vectorizer = joblib.load("tfidf.vec")
    clf = joblib.load("logreg.clf")
    stoplist = makeStoplist()
    while True:
        test = input()
        test = extractFeaturesFromString(test, stoplist)
        print(["-1", "+1"
               ][int(clf.predict(vectorizer.transform([" ".join(test)]))[0])])
        sys.stdout.flush()