class Classifications(): #static variables _category_path = os.path.join(os.path.dirname(__file__), "classifiers/category.slp") _rating_path = os.path.join(os.path.dirname(__file__), "classifiers/rating.slp") _rating_nlp_path = os.path.join(os.path.dirname(__file__), "classifiers/rating_nlp.svm") _sentiment_path = os.path.join(os.path.dirname(__file__), "classifiers/sentiment.nb") _category = SLP.load(_category_path) _rating = SLP.load(_rating_path) _rating_nlp = SVM.load(_rating_nlp_path) _sentiment = NB.load(_sentiment_path) @staticmethod def selectWords(review): ''' a function that gets a review and selects the nouns, adjectives, verbs and exclamation mark ''' review = parsetree(review, lemmata=True)[0] #lemmatize the review #select adjectives (JJ), nouns (NN), verbs (VB) and exclamation marks review = [ w.lemma for w in review if w.tag.startswith(('JJ', 'NN', 'VB', '!')) ] review = count(review) #a dictionary of (word, count) return review @staticmethod def classify(text): predicted_category = Classifications._category.classify(Document(text), discrete=True) predicted_rate = Classifications._rating.classify(Document(text), discrete=True) predicted_rate_nlp = Classifications._rating_nlp.classify( Classifications.selectWords(text), discrete=True) predicted_sentiment_dict = Classifications._sentiment.classify( Classifications.selectWords(text), discrete=False) predicted_sentiment = True if str( sorted(predicted_sentiment_dict.items(), key=operator.itemgetter(1), reverse=True)[1][0]) in ['True', '3.0', '4.0', '5.0' ] else False return { 'text': text, 'rate': predicted_rate, 'category': predicted_category, 'rate_nlp': predicted_rate_nlp, 'positivity': predicted_sentiment }
def set_classifier(self): if self.name == 'SLP': return SLP(train=self.train_data, iterations=self.iterations) elif self.name == 'NB': return NB(train=self.train_data) else: print "Unknown classifier name"
def normal_test(data, type): print '----------------------------------------------------' print 'TEST FUNCTION STARTED FOR ' + type + '!' total_data_size = len(data) training_size = int(round(total_data_size/2)) test_size = training_size print 'Total Size: ' + str(total_data_size) print 'Training Size: ' + str(training_size) print 'Test Size: ' + str(test_size) print 'Training Started for ' + type + '!' classification_methods = { #uncomment based on what classification algorithm you would like to test 'NB' : NB(train=data[:training_size], baseline=MAJORITY, method=MULTINOMIAL), 'KNN2' : KNN(train=data[:training_size], baseline=MAJORITY, k=2, distance=COSINE), 'KNN3' : KNN(train=data[:training_size], baseline=MAJORITY, k=3, distance=COSINE), 'KNN4' : KNN(train=data[:training_size], baseline=MAJORITY, k=4, distance=COSINE), 'KNN5' : KNN(train=data[:training_size], baseline=MAJORITY, k=5, distance=COSINE), 'KNN6' : KNN(train=data[:training_size], baseline=MAJORITY, k=6, distance=COSINE), 'KNN7' : KNN(train=data[:training_size], baseline=MAJORITY, k=7, distance=COSINE), 'KNN8' : KNN(train=data[:training_size], baseline=MAJORITY, k=8, distance=COSINE), 'KNN9' : KNN(train=data[:training_size], baseline=MAJORITY, k=9, distance=COSINE), 'KNN10' : KNN(train=data[:training_size], baseline=MAJORITY, k=10, distance=COSINE), 'SLP1' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=1), 'SLP2' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=2), 'SLP3' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=3), 'SVM' : SVM(train=data[:training_size], type=CLASSIFICATION, kernel=POLYNOMIAL), } print 'Normal Testing Started!' # uncomment to start the normal test for classification in classification_methods.keys(): #measure the time it takes to classify! start = timeit.default_timer() #normal test accuracy, precision, recall, f1 = classification_methods[classification].test(data[training_size:training_size+test_size]) stop = timeit.default_timer() print '*' + classification + '*' print 'Accuracy: ' + str(accuracy) print 'Precision: ' + str(precision) print 'Recall: ' + str(recall) print 'F1-score: ' + str(f1) print 'Time: ' + str(stop - start) print
# Perceptron is an error-driven classifier. # When given a training example (e.g., tagged word + surrounding words), # it will check if it could correctly predict this example. # If not, it will adjust its weights. # So the accuracy of the perceptron can be improved significantly # by training in multiple iterations, averaging out all weights. # This will take several minutes. # If you want it to run faster for experimentation, # use less iterations or less data in the code below: print("training model...") seed(0) # Lock random list shuffling so we can compare. m = Model(known=known, unknown=unknown, classifier=SLP()) for iteration in range(5): for s in shuffled(data[:20000]): prev = None next = None for i, (w, tag) in enumerate(s): if i < len(s) - 1: next = s[i + 1] m.train(w, tag, prev, next) prev = (w, tag) next = None f = os.path.join(os.path.dirname(__file__), "en-model.slp") m.save(f, final=True) # Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
def set_classifier(self): train_feats = self.get_labeled_feats(self.training) tron = SLP(train=train_feats, iterations=4) return tron