def main():
    with open('secret.json', 'r') as fp:
        credentials = json.load(fp)

    client = UserClient(**credentials)

    response = client.api.search.tweets.get(q=sys.argv[1], count=150)

    # Need to break down hierarchy (example 'user' column)
    tweets = pd.DataFrame(response.data['statuses'])

    # Doesnt seem to the best classifier - its quite shit actually
    # Maybe movie reviews for a training corpus isn't the most ideal
    # representation of tweets?
    # A lot of false positives and a few false negatives
    print('Training classifier...')
    classifier = NaiveBayesAnalyzer()
    classifier.train()  # Train on a Movie Review Corpus

    print('Performing Sentiment Analysis...')
    counter = Counter()

    for text in tweets['text']:
        result = classifier.analyze(text)

        counter[result.classification] += 1
        print '%s: %s' % (result.classification, text)

    print 'Total: ', counter
示例#2
0
class TestNaiveBayesAnalyzer(unittest.TestCase):
    def setUp(self):
        self.analyzer = NaiveBayesAnalyzer()

    def test_kind(self):
        assert_equal(self.analyzer.kind, DISCRETE)

    @attr('slow')
    def test_analyze(self):
        p1 = 'I feel great this morning.'
        n1 = 'This is a terrible car.'
        p1_result = self.analyzer.analyze(p1)
        assert_equal(p1_result[0], 'pos')
        assert_equal(self.analyzer.analyze(n1)[0], 'neg')
        # The 2nd item should be the probability that it is positive
        assert_true(isinstance(p1_result[1], float))
        # 3rd item is probability that it is negative
        assert_true(isinstance(p1_result[2], float))
        assert_about_equal(p1_result[1] + p1_result[2], 1)
示例#3
0
class TestNaiveBayesAnalyzer(unittest.TestCase):

    def setUp(self):
        self.analyzer = NaiveBayesAnalyzer()

    def test_kind(self):
        assert_equal(self.analyzer.kind, DISCRETE)

    @attr('slow')
    def test_analyze(self):
        p1 = 'I feel great this morning.'
        n1 = 'This is a terrible car.'
        p1_result = self.analyzer.analyze(p1)
        assert_equal(p1_result[0], 'pos')
        assert_equal(self.analyzer.analyze(n1)[0], 'neg')
        # The 2nd item should be the probability that it is positive
        assert_true(isinstance(p1_result[1], float))
        # 3rd item is probability that it is negative
        assert_true(isinstance(p1_result[2], float))
        assert_about_equal(p1_result[1] + p1_result[2], 1)
        sdata = _scale_data(data, self.ranges)
        self.ax.plot(self.angle, np.r_[sdata, sdata[0]], *args, **kw)
    def fill(self, data, *args, **kw):
        sdata = _scale_data(data, self.ranges)
        self.ax.fill(self.angle, np.r_[sdata, sdata[0]], *args, **kw)


if __name__ == "__main__":

    start = timeit.default_timer()
    # init the analyzers
    analyzerBayes = NaiveBayesAnalyzer()
    analyzerPattern = PatternAnalyzer()

    #training first
    resultBayes = analyzerBayes.analyze("train this")
    resultPattern = analyzerPattern.analyze("train this")

    sc = SparkContext(appName="MovieSentiment")

    # map reduce
    lines = sc.textFile("movieData.txt")

    posNneg = lines.map(sentimentAnalysis) \
                   .reduceByKey(lambda a, b: (a[0] + b[0], a[1] + b[1]))

    output = posNneg.collect()

    ScoreDict = {}

    for (feature, posAndNeg) in output:
示例#5
0
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
import pickle
import sys
from collections import namedtuple
sentimentTuple = namedtuple("Sentiment", field_names=["sentiment", "p_pos", "p_neg"])
n = NaiveBayesAnalyzer()
for file in sys.argv[1:]:
    with open(file, "rb") as f:
        df = pickle.load(f)
    l = []
    for i, tweet in enumerate(df['tweet']):
        #print("{}/{}".format(i, len(df['tweet'])))
        s = n.analyze(tweet)

        l.append((s.classification, s.p_pos, s.p_neg))
    pos = len(list(filter(lambda x: x[0] == 'pos', l)))
    neg = len(list(filter(lambda x: x[0] == 'neg', l)))
    print("Candidate Name: "+file)
    print("Total positive tweets: {}".format(pos))
    print("Total negative tweets: {}".format(neg))
    print('negative to total ratio: {}'.format(neg / (neg + pos)))
    with open(file.replace(".pkl", "")+"Sentiment.pkl", "wb") as f:
        pickle.dump(l, f)
        f.close()