/
twittercount.py
143 lines (107 loc) · 3.82 KB
/
twittercount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
''' File name: twittercount.py
Author: Jae Hyun Moon
Date created: 12/25/2017
Description: This program will get the number of tweets for a specific
keyword that were generated from a designated time in the past up to the present time
'''
import urllib.request, urllib.parse, urllib.error
import twurl
import ssl
import json
import datetime
#https://apps.twitter.com/
TWITTER_URL = 'https://api.twitter.com/1.1/search/tweets.json'
#Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
#Represent the current time
a = datetime.datetime.utcnow()
year2 = a.year
month2 = a.month
day2 = a.day
hour2 = a.hour
minute2 = a.minute
second2 = a.second
print('current time: ',a)
#Total tweet counts over the course of requested timeframe
totalcounter = 0
#Used for max_id
identity = ""
while True:
print('')
keyword = input('Enter Your Search Keyword:')
if (len(keyword) < 1): break
#Represents the 2min timeframe
minutetime = 2
#Describes the number of timeframes you wish to retrieve
for x in range(0, 3):
#max_id is used to set an ending marker of each timeframe
url = twurl.augment(TWITTER_URL,
{'q': keyword, 'count': '100', 'max_id': identity, 'result_type': 'recent'})
connection = urllib.request.urlopen(url, context=ctx)
data = connection.read().decode()
jsondata = json.loads(data)
#Count for each timeframe
counter = 0
for id in jsondata['statuses']:
a = id['created_at']
b = list(a)
del b[0:4]
#Parsing 'created_at' into data that can be used as datetime
year = b[22:26]
month = b[0:3]
day = b[4:6]
hour = b[7:9]
minute = b[10:12]
second = b[13:15]
yr = int(''.join(year))
mo = 0
d = int(''.join(day))
hr = int(''.join(hour))
minute1 = int(''.join(minute))
sec = int(''.join(second))
if ''.join(month) == "Jan":
mo = 1
elif ''.join(month) == "Feb":
mo = 2
elif ''.join(month) == "Mar":
mo = 3
elif ''.join(month) == "Apr":
mo = 4
elif ''.join(month) == "May":
mo = 5
elif ''.join(month) == "Jun":
mo = 6
elif ''.join(month) == "Jul":
mo = 7
elif ''.join(month) == "Aug":
mo = 8
elif ''.join(month) == "Sep":
mo = 9
elif ''.join(month) == "Oct":
mo = 10
elif ''.join(month) == "Nov":
mo = 11
else:
mo = 12
delta = datetime.timedelta(minutes = minutetime)
time1 = datetime.datetime(year = year2, month = month2, day = day2, hour = hour2, minute = minute2, second = second2)\
- delta
time2 = datetime.datetime(year = yr, month = mo, day = d, hour = hr, minute = minute1, second = sec)
print(time2,time1)
#This loop breaks only when 'created_at' is dated minutetime before the current time
if time2 < time1:
print("count for ",minutetime - 2, " - ", minutetime, "min before the current time: ", counter)
#Update identity so it is set correctly for the loop of next timeframe
identity = id['id_str']
break
else:
totalcounter += 1
counter += 1
minutetime += 2
print("total count: ", totalcounter)
print('---------------------------------------------------------')
headers = dict(connection.getheaders())
# print headers
print('Remaining', headers['x-rate-limit-remaining'])