class Classifications(): #static variables _category_path = os.path.join(os.path.dirname(__file__), "classifiers/category.slp") _rating_path = os.path.join(os.path.dirname(__file__), "classifiers/rating.slp") _rating_nlp_path = os.path.join(os.path.dirname(__file__), "classifiers/rating_nlp.svm") _sentiment_path = os.path.join(os.path.dirname(__file__), "classifiers/sentiment.nb") _category = SLP.load(_category_path) _rating = SLP.load(_rating_path) _rating_nlp = SVM.load(_rating_nlp_path) _sentiment = NB.load(_sentiment_path) @staticmethod def selectWords(review): ''' a function that gets a review and selects the nouns, adjectives, verbs and exclamation mark ''' review = parsetree(review, lemmata=True)[0] #lemmatize the review #select adjectives (JJ), nouns (NN), verbs (VB) and exclamation marks review = [ w.lemma for w in review if w.tag.startswith(('JJ', 'NN', 'VB', '!')) ] review = count(review) #a dictionary of (word, count) return review @staticmethod def classify(text): predicted_category = Classifications._category.classify(Document(text), discrete=True) predicted_rate = Classifications._rating.classify(Document(text), discrete=True) predicted_rate_nlp = Classifications._rating_nlp.classify( Classifications.selectWords(text), discrete=True) predicted_sentiment_dict = Classifications._sentiment.classify( Classifications.selectWords(text), discrete=False) predicted_sentiment = True if str( sorted(predicted_sentiment_dict.items(), key=operator.itemgetter(1), reverse=True)[1][0]) in ['True', '3.0', '4.0', '5.0' ] else False return { 'text': text, 'rate': predicted_rate, 'category': predicted_category, 'rate_nlp': predicted_rate_nlp, 'positivity': predicted_sentiment }
from pattern.vector import SVM #from Jseg import jieba from os.path import realpath, dirname, join CUR_PATH = dirname(realpath(__file__)) sentipol_cls = SVM.load(join(CUR_PATH, 'svm_mod.gpk')) execfile(join(CUR_PATH, 'Sentipol.py')) def sentipol_tmp(text): from Jseg import jieba text = jieba.seg(text).nopos().split() pol = sentipol_cls.classify(text) details = sentipol(text) return pol, details
from pattern.vector import SVM, KNN, NB, count, shuffled from pattern.en import tag, predicative classifier = SVM() classifier = SVM.load("sentiment.p") def instance(review): # "Great book!" v = tag(review) # [("Great", "JJ"), ("book", "NN"), ("!", "!")] v = [word for (word, pos) in v if pos in ("JJ", "RB", "VB", "VBZ", "NN", "NNS", "NNP", "NNPS") or word in ("!")] v = [predicative(word) for word in v] # ["great", "!", "!"] v = count(v) # {"great": 1, "!": 1} return v score = classifier.classify(instance("you little bitch")) print(score)