Пример #1
0
class TestPositiveNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        sports_sentences = ['The team dominated the game',
                          'They lost the ball',
                          'The game was intense',
                          'The goalkeeper catched the ball',
                          'The other team controlled the ball'
                            'The ball went off the court',
                           'They had the ball for the whole game']

        various_sentences = ['The President did not comment',
                               'I lost the keys',
                               'The team won the game',
                               'Sara has two kids',
                               'The show is over',
                               'The cat ate the mouse.']

        self.classifier = PositiveNaiveBayesClassifier(positive_set=sports_sentences,
                                                        unlabeled_set=various_sentences)

    def test_classifier(self):
        assert_true(isinstance(self.classifier.classifier,
                               nltk.classify.PositiveNaiveBayesClassifier))


    def test_classify(self):
        assert_true(self.classifier.classify("My team lost the game."))
        assert_false(self.classifier.classify("The cat is on the table."))

    def test_update(self):
        orig_pos_length = len(self.classifier.positive_set)
        orig_unlabeled_length = len(self.classifier.unlabeled_set)
        self.classifier.update(new_positive_data=['He threw the ball to the base.'],
                                new_unlabeled_data=["I passed a tree today."])
        new_pos_length = len(self.classifier.positive_set)
        new_unlabeled_length = len(self.classifier.unlabeled_set)
        assert_equal(new_pos_length, orig_pos_length + 1)
        assert_equal(new_unlabeled_length, orig_unlabeled_length + 1)

    def test_accuracy(self):
        test_set = [
            ("My team lost the game", True),
            ("The ball was in the court.", True),
            ("We should have won the game.", True),
            ("And now for something completely different", False),
            ("I can't believe it's not butter.", False)
        ]
        accuracy = self.classifier.accuracy(test_set)
        assert_true(isinstance(accuracy, float))

    def test_repr(self):
        assert_equal(repr(self.classifier),
            "<PositiveNaiveBayesClassifier trained on {0} labeled and {1} unlabeled instances>"
                .format(len(self.classifier.positive_set),
                        len(self.classifier.unlabeled_set))
                     )
Пример #2
0
    def on_data(self, data):
        # pprint (data)
        # saveFile = io.open('tweet_raw.json', 'a', encoding='utf-8')co
        # thetweets = json.loads(data)
        print(json.loads(data))
        self.tweet_data.append(json.loads(data))

        tweets = Htweets2()
        for x in self.tweet_data:
            self.just_text.append(x['text'])
            #cl.classify(x['text'])
            #result =
            #result2 = 'ing' #if cl.classify(x['text']) == cl.labels() == 'ing' else 'none'
            #result3 = 'normal' #if not (cl.classify(x['text']) == cl.labels() != result and cl.classify(x['text']) == cl.labels() != result2) else 'none'
            tweets.tweet_timestamp = x['timestamp_ms']
            tweets.tweet_id = x['id']
            tweets.tweet_screenname = x['user']['screen_name']
            tweets.tweet_recount = x['retweet_count']
            tweets.tweet_favour_count = x['favorite_count']
            tweets.tweet_text = profanity.censor(x['text'])

            tweets.tweet_location = x['user']['location']
            tweets.tweet_media_entities = x['source']

            #critical_train2 = [(x['text']), 'norm']
            #cl2 = NaiveBayesClassifier(critical_train2)

            classifier = PositiveNaiveBayesClassifier(
                positive_set=critical_train_neg, unlabeled_set=neg_neutral)
            classifier1 = PositiveNaiveBayesClassifier(
                positive_set=critical_train_ing, unlabeled_set=ing_neutral)
            classifier.classify(x['text'])
            classifier1.classify(x['text'])

            if classifier.classify(x['text']) is True and cl.classify(
                    x['text']) == 'alert':
                print 'not normal - alert'
                tweets.tweet_status = 'not normal'
                tweets.tweet_score = 'alert'
            elif classifier.classify(x['text']) is False:
                print 'normal-no alert'
                tweets.tweet_status = 'normal'
                tweets.tweet_score = 'neutral'
            elif cl2.classify(x['text']) == 'neu':
                print 'normal-neutral'
                tweets.tweet_score = 'neutral'
                tweets.tweet_status = 'normal'
            elif classifier1.classify(x['text']) is True and cl.classify(
                    x['text']) == 'critical':
                print 'not normal - critical'
                tweets.tweet_status = 'not normal'
                tweets.tweet_score = 'critical'
            elif classifier1.classify(x['text']) is False:
                print 'normal-no critical'
                tweets.tweet_score = 'neutral'
                tweets.tweet_status = 'normal'

            tweets.save()
Пример #3
0
#2. Sentiment Analysis
# function -> text.sentiment
# result -> returns named polairty tuple (Polarity, subjectivity)
# Polarity -> range from [-1.0,1.0]
# Subjectivity -> ranges from [0.0,1.0]
#				  0.0 -> very objective
#				  0.0 -> very subjective
review = tb(
    "The product release was effective, and the over all release was good as well as smooth"
)
SENT = review.sentiment
print(SENT)

# 3. Classification
sports_sentences = [
    'The team dominated the game', 'They lost the ball',
    'The game was intense', 'The goalkeeper catched the ball',
    'The other team controlled the ball'
]

various_sentences = [
    'The President did not comment', 'I lost the keys', 'The Game was Bad',
    'The team won the game', 'Sara has two kids',
    'The ball went off the court', 'The show is over'
]

classifier = PositiveNaiveBayesClassifier(positive_set=sports_sentences,
                                          unlabeled_set=various_sentences)

print(classifier.classify("My team lost the game"))
print(classifier.classify("The Game was "))