-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
94 lines (77 loc) · 1.96 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
'''
In this project, you will visualize the feelings and language used in a set of
Tweets. This starter code loads the appropriate libraries and the Twitter data you'll
need!
'''
import json
from textblob import TextBlob
import matplotlib.pyplot as plt
from wordcloud import WordCloud
#Get the JSON data
tweetFile = open("tweets_small.json", "r")
tweetData = json.load(tweetFile)
tweetFile.close()
# Continue your program below!
filtered_words = ["of", "in", "the", "for", "they", "if", "with", "is", "https"]
polarity = []
subjectivity = []
i = 0
bigtweet = ""
for tweet in tweetData:
blob = TextBlob(tweet["text"])
tweet_p = blob.polarity
tweet_s = blob.subjectivity
polarity.append(tweet_p)
subjectivity.append(tweet_s)
i += 1
#bigtweet += text
#blog = TextBlob(tweet)
filtered = {}
#wordslist = bigblob.words
for word in filtered_words:
#filters here
if len(word) < 2:
continue
if not word.isalpha():
continue
if word.lower() in filtered_words:
continue
filtered[word.lower()] = blob.word_counts[word.lower()]
print(word)
final1 = sum(polarity)
len1 = len(polarity)
avg_p = final1 / len1
final2 = sum(subjectivity)
len2 = len(subjectivity)
avg_s = final2 / len2
print(avg_p)
print(avg_s)
# Textblob sample:
#tb = TextBlob("You are a brilliant computer scientist.")
#print(tb.polarity)
plt.title('Pol Histogram')
plt.xlabel('Polarity')
plt.ylabel('Rate')
plt.grid(True)
plt.hist(polarity , bins = 5)
plt.axis([-1.00, 1.00, 0, 60])
plt.show()
plt.title('Sub Histogram')
plt.xlabel('Subjectivity')
plt.ylabel('Rate')
plt.grid(True)
plt.hist(subjectivity, bins = 5)
plt.axis([-1.00, 1.00, 0, 60])
plt.show()
plt.scatter(polarity, subjectivity)
plt.show()
all_tweets = ', '.join(tweet['text'] for tweet in tweetData)
tb = TextBlob(all_tweets)
#filtered_words = []
print(all_tweets)
print(str(tweet))
wordcloud = WordCloud().generate(all_tweets)
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
#generate_from_text(text)