-
Notifications
You must be signed in to change notification settings - Fork 0
/
tw_server.py
315 lines (226 loc) · 8.46 KB
/
tw_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
""" Twitters processing server
Takes queries from folder with tasks line
Works in endless loop while running
In each task query:
Search twits according to search params during selected time
Calculate KPIs
Write data
Requestes task managing and result view available in client app (tw_client.py)
"""
from json import dumps
from json import loads
import oauth2
from os import listdir
from os import makedirs
from os import path
from os import remove as osremove
from time import sleep as sleep_timer
from time import time as current_timer
def initiate():
"""Declaring needed paths and globals
Cheks for needed dirs - 'queries' , 'relusts' in home directory
and make them if they not exist
Cheks for users file and make it if not exist
"""
global QUERIES_DIR, RESULTS_DIR, KPI_DICT_TEMPLATE
reslut = True
app_dir = path.abspath(path.dirname(__file__))
QUERIES_DIR = path.normpath(path.join(app_dir,'queries/'))
RESULTS_DIR = path.normpath(path.join(app_dir,'results/'))
users_file_path = path.normpath(path.join(app_dir,'users.txt'))
if not path.isdir(QUERIES_DIR):
makedirs(QUERIES_DIR)
print 'Made dir for queries: %s' %QUERIES_DIR
if not path.isdir(RESULTS_DIR):
makedirs(RESULTS_DIR)
print 'Made dir for results: %s' %RESULTS_DIR
if not path.isfile(users_file_path):
users_file = open(users_file_path,'w')
users_template = (
"{'user_name': 'admin', 'password': '0', 'admin_role': '1'}\
\n{'user_name': 'user1', 'password': '1', 'admin_role': '0'}\
\n{'user_name': 'user2', 'password': '2', 'admin_role': '0'}")
users_file.write(users_template)
users_file.close()
print 'Made users file: %s' %users_file_path
KPI_DICT_TEMPLATE = {
'twitts_counter':0,
'queries_counter':0,
'retweeted':0,
'languages':[],
'languages_counter':0,
'user':
{'uniqe_users':[],
'users_counter':0,
'geo_enabled':0,
'followers_count':0,
'time_zones':[],
'time_zone_counter':0,}
}
def change_query_file(file_path):
"""Marks query file as proccessed by re-writing files suffics"""
done_file_path = file_path.replace('_new','') + '_done'
todo_file = open(file_path,'r')
done_file = open(done_file_path,'w')
done_file.write(todo_file.readline())
done_file.close()
todo_file.close()
osremove(path.abspath(todo_file.name))
def start_working():
"""Processes queries files in never ended loop with some time delay"""
default_timer = run_time = 60
while True:
waiting_time = max(default_timer - run_time,1)
print 'Next run will start in - %s seconds' %waiting_time
sleep_timer(waiting_time)
start_time = current_timer()
print('Starting reading queries line')
for filename in sorted(listdir(QUERIES_DIR)):
if filename[-5:] == '_done': continue
todo_file_path = path.join(QUERIES_DIR,filename)
print('Start processing file: %s' %todo_file_path)
todo_file = open(todo_file_path,'r')
line = todo_file.readline()
todo_file.close()
try:
dict_line = loads(line.replace("'", "\""))
except Error as err:
print 'Error in JSON loading: %s' %err
continue
try:
start_task(dict_line,filename)
except Error as err:
print 'Error in task doing: %s' % err
continue
change_query_file(todo_file_path)
run_time = current_timer() - start_time
print 'Finished reading queries directory, reading time - %s' %run_time
def make_client():
"""Makes connector to twitter through oauth2 and returns it"""
# INSERT YOUR ACCESS DATA HERE!!
ACCESS_TOKEN = '4092480916-LKekIbO1izZpfyE4eSIXhd1gB4EXo0nsJnoYJh5'
ACCESS_SECRET = 'rDLOMkRbye0bwwQqAtWKJwiGxqJSoLYPj4VexvkxDoYJ8'
CONSUMER_KEY = 'MLkrjaZz5gqkfdl6Y4Pj2ic6B'
CONSUMER_SECRET = 'JkjN1isrqiXxdViGb28GdcbMreTD30uuEy38oup0ptUzht4G9U'
consumer = oauth2.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET)
token = oauth2.Token(key=ACCESS_TOKEN, secret=ACCESS_SECRET)
return oauth2.Client(consumer, token)
def query_to_twitter(words,only_twitts=False,count = 15, lang='',geo=''):
"""Gets data from twitter and decode from JSON
Args:
words: searched frase
only_twitts: if True - execute and return 'statuses' section from data
if False - return 'pure' data
count: number of twitts to reciev
lang: language filter
geo: geo filter
Return:
decoded data
"""
url = 'https://api.twitter.com/1.1/search/tweets.json?'
url += 'q={}&count={}'.format(words,str(count))
if lang: url += '&lang=%s' %lang
print url
client = make_client()
resp, content = client.request( url, 'GET')
print('Server answer: {}'.format(resp['status']))
if resp['status'] == '503': #out of queries limits
print ('out of limits')
sleep_timer(2)
return ''
data = loads(content.decode('utf-8'))
if only_twitts:
data = data.get('statuses')
return data
def process_kpi(twitt,kpi_dict):
"""Add some data from twitt to KPIs dict with processing
Args:
twitt: dict with one twitt data
kpi_dict: general dict with KPIs,
changed and/or extendet after processing
"""
kpi_dict['retweeted'] += twitt.get('retweeted')
if not twitt['lang'] in kpi_dict['languages']:
kpi_dict['languages'].append(twitt.get('lang'))
user_id = twitt.get('user').get('id')
if not user_id in kpi_dict['user']['uniqe_users']:
kpi_dict['user']['uniqe_users'].append(user_id)
geo_enabled = int(twitt.get('user').get('geo_enabled'))
kpi_dict['user']['geo_enabled'] += geo_enabled
followers_count = int(twitt.get('user').get('followers_count'))
kpi_dict['user']['followers_count'] += followers_count
time_zone = twitt.get('user').get('time_zone')
if not time_zone in kpi_dict['user']['time_zones']:
kpi_dict['user']['time_zones'].append(time_zone)
def process_twitts(twitters_data,exclude_twitts,kpi_dict):
"""Gets twiits form input data and process each of them
Calculates KPIs for each tweet and write agregated KPIs to kpi_dict
Uses cash to aviod processing repeated twits in session
Args:
twitters_data: list or dict with twitts
exclude_twitts: list for cashing proceseed twits
kpi_dict: dict with agregated KPIs
Returns:
list of processed twitts with all included data (each twitt is a dict)
"""
result_list = []
for twitt in twitters_data:
if twitt['id'] in exclude_twitts:
print '%s - skipped' %twitt['id']
continue
else:
exclude_twitts.append(twitt['id'])
print '%s - added' %twitt['id']
new_twitt = {}
for colum in twitt:
if type(twitt.get(colum)) is str:
new_twitt[colum] = twitt.get(colum).encode('utf-8')
else:
new_twitt[colum] = twitt.get(colum)
result_list.append(new_twitt)
process_kpi(twitt,kpi_dict)
return result_list
def start_task(params,task_file_name=''):
"""Read task data from file and run twitts processing
Write processing result in file
Args:
params: dict with parameters
task_file_name: task file name, need to create appropriate result file
if '' - will create own with '_taskmade_' tag in name
"""
global KPI_DICT_TEMPLATE
print('Staring task for user %s' % params.get('user_name'))
if not task_file_name:
task_file_name = str(current_timer()).replace('.','')+'_%s_taskmade_'\
%user
results_file_name = task_file_name.replace('_new','')+'_results'
results_file_path = path.join(RESULTS_DIR,results_file_name)
results_file = open(results_file_path,'a')
results_list = []
twitts_id_cashe = []
counter = 0
target_end_time = current_timer()+float(params.get('time_seconds'))
while current_timer() <= target_end_time:
anserw_count = 10
if counter == 0: anserw_count = 100
twitts = query_to_twitter(params.get('query'),True,anserw_count)
if not twitts: continue
kpi_dict = KPI_DICT_TEMPLATE.copy()
new_twitts_lst = process_twitts(twitts,twitts_id_cashe,kpi_dict)
results_list.extend(new_twitts_lst)
counter += 1
sleep_timer(5)
kpi_dict['twitts_counter'] = len(results_list)
kpi_dict['queries_counter'] = counter
kpi_dict['languages_counter'] = len(kpi_dict['languages'])
kpi_dict['user']['users_counter'] = len(kpi_dict['user']['uniqe_users'])
kpi_dict['user']['time_zone_counter'] = len(kpi_dict['user']['time_zones'])
result_dict = {'kpi':kpi_dict,'statuses':results_list}
results_file.write(dumps(result_dict))
results_file.close()
print ('End task for user {} , cicles - {}'.format(params.get('user_name')
,str(counter)))
### MAIN FLOW ###
QUERIES_DIR = RESULTS_DIR = KPI_DICT_TEMPLATE = ''
initiate()
start_working()