Exemplo n.º 1
0
class TestPositiveNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        sports_sentences = ['The team dominated the game',
                          'They lost the ball',
                          'The game was intense',
                          'The goalkeeper catched the ball',
                          'The other team controlled the ball'
                            'The ball went off the court',
                           'They had the ball for the whole game']

        various_sentences = ['The President did not comment',
                               'I lost the keys',
                               'The team won the game',
                               'Sara has two kids',
                               'The show is over',
                               'The cat ate the mouse.']

        self.classifier = PositiveNaiveBayesClassifier(positive_set=sports_sentences,
                                                        unlabeled_set=various_sentences)

    def test_classifier(self):
        assert_true(isinstance(self.classifier.classifier,
                               nltk.classify.PositiveNaiveBayesClassifier))


    def test_classify(self):
        assert_true(self.classifier.classify("My team lost the game."))
        assert_false(self.classifier.classify("The cat is on the table."))

    def test_update(self):
        orig_pos_length = len(self.classifier.positive_set)
        orig_unlabeled_length = len(self.classifier.unlabeled_set)
        self.classifier.update(new_positive_data=['He threw the ball to the base.'],
                                new_unlabeled_data=["I passed a tree today."])
        new_pos_length = len(self.classifier.positive_set)
        new_unlabeled_length = len(self.classifier.unlabeled_set)
        assert_equal(new_pos_length, orig_pos_length + 1)
        assert_equal(new_unlabeled_length, orig_unlabeled_length + 1)

    def test_accuracy(self):
        test_set = [
            ("My team lost the game", True),
            ("The ball was in the court.", True),
            ("We should have won the game.", True),
            ("And now for something completely different", False),
            ("I can't believe it's not butter.", False)
        ]
        accuracy = self.classifier.accuracy(test_set)
        assert_true(isinstance(accuracy, float))

    def test_repr(self):
        assert_equal(repr(self.classifier),
            "<PositiveNaiveBayesClassifier trained on {0} labeled and {1} unlabeled instances>"
                .format(len(self.classifier.positive_set),
                        len(self.classifier.unlabeled_set))
                     )
Exemplo n.º 2
0
    def setUp(self):
        sports_sentences = [
            'The team dominated the game', 'They lost the ball',
            'The game was intense', 'The goalkeeper catched the ball',
            'The other team controlled the ball'
            'The ball went off the court',
            'They had the ball for the whole game'
        ]

        various_sentences = [
            'The President did not comment', 'I lost the keys',
            'The team won the game', 'Sara has two kids', 'The show is over',
            'The cat ate the mouse.'
        ]

        self.classifier = PositiveNaiveBayesClassifier(
            positive_set=sports_sentences, unlabeled_set=various_sentences)
Exemplo n.º 3
0
    def setUp(self):
        sports_sentences = ['The team dominated the game',
                          'They lost the ball',
                          'The game was intense',
                          'The goalkeeper catched the ball',
                          'The other team controlled the ball'
                            'The ball went off the court',
                           'They had the ball for the whole game']

        various_sentences = ['The President did not comment',
                               'I lost the keys',
                               'The team won the game',
                               'Sara has two kids',
                               'The show is over',
                               'The cat ate the mouse.']

        self.classifier = PositiveNaiveBayesClassifier(positive_set=sports_sentences,
                                                        unlabeled_set=various_sentences)
Exemplo n.º 4
0
    def on_data(self, data):
        # pprint (data)
        # saveFile = io.open('tweet_raw.json', 'a', encoding='utf-8')co
        # thetweets = json.loads(data)
        print(json.loads(data))
        self.tweet_data.append(json.loads(data))

        tweets = Htweets2()
        for x in self.tweet_data:
            self.just_text.append(x['text'])
            #cl.classify(x['text'])
            #result =
            #result2 = 'ing' #if cl.classify(x['text']) == cl.labels() == 'ing' else 'none'
            #result3 = 'normal' #if not (cl.classify(x['text']) == cl.labels() != result and cl.classify(x['text']) == cl.labels() != result2) else 'none'
            tweets.tweet_timestamp = x['timestamp_ms']
            tweets.tweet_id = x['id']
            tweets.tweet_screenname = x['user']['screen_name']
            tweets.tweet_recount = x['retweet_count']
            tweets.tweet_favour_count = x['favorite_count']
            tweets.tweet_text = profanity.censor(x['text'])

            tweets.tweet_location = x['user']['location']
            tweets.tweet_media_entities = x['source']

            #critical_train2 = [(x['text']), 'norm']
            #cl2 = NaiveBayesClassifier(critical_train2)

            classifier = PositiveNaiveBayesClassifier(
                positive_set=critical_train_neg, unlabeled_set=neg_neutral)
            classifier1 = PositiveNaiveBayesClassifier(
                positive_set=critical_train_ing, unlabeled_set=ing_neutral)
            classifier.classify(x['text'])
            classifier1.classify(x['text'])

            if classifier.classify(x['text']) is True and cl.classify(
                    x['text']) == 'alert':
                print 'not normal - alert'
                tweets.tweet_status = 'not normal'
                tweets.tweet_score = 'alert'
            elif classifier.classify(x['text']) is False:
                print 'normal-no alert'
                tweets.tweet_status = 'normal'
                tweets.tweet_score = 'neutral'
            elif cl2.classify(x['text']) == 'neu':
                print 'normal-neutral'
                tweets.tweet_score = 'neutral'
                tweets.tweet_status = 'normal'
            elif classifier1.classify(x['text']) is True and cl.classify(
                    x['text']) == 'critical':
                print 'not normal - critical'
                tweets.tweet_status = 'not normal'
                tweets.tweet_score = 'critical'
            elif classifier1.classify(x['text']) is False:
                print 'normal-no critical'
                tweets.tweet_score = 'neutral'
                tweets.tweet_status = 'normal'

            tweets.save()





]
testing = [
('Investor wealth rises Rs 4.82 lakh crore in two days of market bullish rise','pos'),
('Investor wealth rises Rs 4.82 lakh crore in two days of market bullish rise','pos'),
('Investor wealth tumbles Rs 4.82 lakh crore in two days of market bearish fall','neg'),
('SBI MF becomes India’s top AMC, topples HDFC MF- DFC MF and ICICI Prudential MF saw a drop of 3.33 % and 2.98% in their average AUM','neg'),
('Seven of top 10 cos lose Rs 2.82 lakh crore in m-cap TCS, HDFC Bank hammered','neg'),
('tesla shares haven’t actually dropped much and are still pretty high. they’re probably a good long term investment','pos'),
(' tesla shares down 6.01% to $701.8 stocks stockmarket stockstowatch stockstotrade stock stocktrading financial market consumer auto automobile manufacturing manufacturer manufacturers','neg'),
('tesla stock is just stupid high right now. zero demand for cars, oil at $0, and shares at $700?','neg'),
('unconvinced by the recent run up in shares, bank of america has downgraded tesla to "underperform" wednesday morning and moved their price target to $485 from $500.','neg'),
('tesla stands out in commanding investors confidence. its shares are up by 64% this year sentiment','pos'),
('finally hit 100% on my tesla postion return 🎯 and believe ima continue to hold all them shares','pos'),
('i was able to accumulate a handful more shares when i sold after it fell to 750 when it looked like itd fall a bit more','neg'),
('options flow grid update $tsla optionstrading tesla shares down 3.31% to $772.3 optionsflow stocks stockmarket investing investment','neg')

]
cl1=PositiveNaiveBayesClassifier(positive_set=relevant,unlabeled_set=irrelevant)
cl = NaiveBayesClassifier(training)
print (cl.accuracy(testing)*100 ,"%")
#print(cl1.accuracy)
#blob = TextBlob('good idea to sell', classifier=cl)
#print(cl.classify("analyst downgrades stock saying q1 ‘phenomenal’ but shares ‘not inexpensive"))
#print(cl1.classify("analyst downgrades stock saying q1 ‘phenomenal’ but shares ‘not inexpensive"))
Exemplo n.º 6
0
#2. Sentiment Analysis
# function -> text.sentiment
# result -> returns named polairty tuple (Polarity, subjectivity)
# Polarity -> range from [-1.0,1.0]
# Subjectivity -> ranges from [0.0,1.0]
#				  0.0 -> very objective
#				  0.0 -> very subjective
review = tb(
    "The product release was effective, and the over all release was good as well as smooth"
)
SENT = review.sentiment
print(SENT)

# 3. Classification
sports_sentences = [
    'The team dominated the game', 'They lost the ball',
    'The game was intense', 'The goalkeeper catched the ball',
    'The other team controlled the ball'
]

various_sentences = [
    'The President did not comment', 'I lost the keys', 'The Game was Bad',
    'The team won the game', 'Sara has two kids',
    'The ball went off the court', 'The show is over'
]

classifier = PositiveNaiveBayesClassifier(positive_set=sports_sentences,
                                          unlabeled_set=various_sentences)

print(classifier.classify("My team lost the game"))
print(classifier.classify("The Game was "))