Exemplo n.º 1
0
class Classifications():

    #static variables
    _category_path = os.path.join(os.path.dirname(__file__),
                                  "classifiers/category.slp")
    _rating_path = os.path.join(os.path.dirname(__file__),
                                "classifiers/rating.slp")
    _rating_nlp_path = os.path.join(os.path.dirname(__file__),
                                    "classifiers/rating_nlp.svm")
    _sentiment_path = os.path.join(os.path.dirname(__file__),
                                   "classifiers/sentiment.nb")

    _category = SLP.load(_category_path)
    _rating = SLP.load(_rating_path)
    _rating_nlp = SVM.load(_rating_nlp_path)
    _sentiment = NB.load(_sentiment_path)

    @staticmethod
    def selectWords(review):
        '''
        a function that gets a review and selects the nouns, adjectives, verbs and exclamation mark
        '''
        review = parsetree(review, lemmata=True)[0]  #lemmatize the review
        #select adjectives (JJ), nouns (NN), verbs (VB) and exclamation marks
        review = [
            w.lemma for w in review
            if w.tag.startswith(('JJ', 'NN', 'VB', '!'))
        ]
        review = count(review)  #a dictionary of (word, count)
        return review

    @staticmethod
    def classify(text):
        predicted_category = Classifications._category.classify(Document(text),
                                                                discrete=True)
        predicted_rate = Classifications._rating.classify(Document(text),
                                                          discrete=True)
        predicted_rate_nlp = Classifications._rating_nlp.classify(
            Classifications.selectWords(text), discrete=True)
        predicted_sentiment_dict = Classifications._sentiment.classify(
            Classifications.selectWords(text), discrete=False)
        predicted_sentiment = True if str(
            sorted(predicted_sentiment_dict.items(),
                   key=operator.itemgetter(1),
                   reverse=True)[1][0]) in ['True', '3.0', '4.0', '5.0'
                                            ] else False

        return {
            'text': text,
            'rate': predicted_rate,
            'category': predicted_category,
            'rate_nlp': predicted_rate_nlp,
            'positivity': predicted_sentiment
        }
Exemplo n.º 2
0
 def set_classifier(self):
     if self.name == 'SLP':
         return SLP(train=self.train_data, iterations=self.iterations)
     elif self.name == 'NB':
         return NB(train=self.train_data)
     else:
         print "Unknown classifier name"
Exemplo n.º 3
0
def normal_test(data, type):
    print '----------------------------------------------------'
    print 'TEST FUNCTION STARTED FOR ' + type + '!'
    total_data_size = len(data)
    training_size = int(round(total_data_size/2))
    test_size = training_size
    print 'Total Size: ' + str(total_data_size)
    print 'Training Size: ' + str(training_size)
    print 'Test Size: ' + str(test_size)

    print 'Training Started for ' + type + '!'
    classification_methods = {
      #uncomment based on what classification algorithm you would like to test
      'NB' :  NB(train=data[:training_size], baseline=MAJORITY, method=MULTINOMIAL),
      'KNN2' : KNN(train=data[:training_size], baseline=MAJORITY, k=2, distance=COSINE),
      'KNN3' : KNN(train=data[:training_size], baseline=MAJORITY, k=3, distance=COSINE),
      'KNN4' : KNN(train=data[:training_size], baseline=MAJORITY, k=4, distance=COSINE),
      'KNN5' : KNN(train=data[:training_size], baseline=MAJORITY, k=5, distance=COSINE),
      'KNN6' : KNN(train=data[:training_size], baseline=MAJORITY, k=6, distance=COSINE),
      'KNN7' : KNN(train=data[:training_size], baseline=MAJORITY, k=7, distance=COSINE),
      'KNN8' : KNN(train=data[:training_size], baseline=MAJORITY, k=8, distance=COSINE),
      'KNN9' : KNN(train=data[:training_size], baseline=MAJORITY, k=9, distance=COSINE),
      'KNN10' : KNN(train=data[:training_size], baseline=MAJORITY, k=10, distance=COSINE),
      'SLP1' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=1),
      'SLP2' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=2),
      'SLP3' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=3),
      'SVM' : SVM(train=data[:training_size], type=CLASSIFICATION, kernel=POLYNOMIAL),
    }

    print 'Normal Testing Started!'
    # uncomment to start the normal test
    for classification in classification_methods.keys():
      #measure the time it takes to classify!
      start = timeit.default_timer()
      #normal test
      accuracy, precision, recall, f1 = classification_methods[classification].test(data[training_size:training_size+test_size])
      stop = timeit.default_timer()
      print '*' + classification + '*'
      print 'Accuracy: ' + str(accuracy)
      print 'Precision: ' + str(precision)
      print 'Recall: ' + str(recall)
      print 'F1-score: ' + str(f1)
      print 'Time: ' + str(stop - start)
      print
Exemplo n.º 4
0
# Perceptron is an error-driven classifier.
# When given a training example (e.g., tagged word + surrounding words),
# it will check if it could correctly predict this example.
# If not, it will adjust its weights.
# So the accuracy of the perceptron can be improved significantly
# by training in multiple iterations, averaging out all weights.

# This will take several minutes.
# If you want it to run faster for experimentation,
# use less iterations or less data in the code below:

print("training model...")

seed(0)  # Lock random list shuffling so we can compare.

m = Model(known=known, unknown=unknown, classifier=SLP())
for iteration in range(5):
    for s in shuffled(data[:20000]):
        prev = None
        next = None
        for i, (w, tag) in enumerate(s):
            if i < len(s) - 1:
                next = s[i + 1]
            m.train(w, tag, prev, next)
            prev = (w, tag)
            next = None

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
Exemplo n.º 5
0
 def set_classifier(self):
     train_feats = self.get_labeled_feats(self.training)
     tron = SLP(train=train_feats, iterations=4)
     return tron