-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler.py
368 lines (309 loc) · 13.7 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# @author: James Zhang
# @data : 2021/2/10
import datetime
import os
import time
from collections import deque, namedtuple
import re
from random import random
from func_timeout import func_set_timeout
from func_timeout.exceptions import FunctionTimedOut
from selenium.common.exceptions import WebDriverException
from page_parse import PageParse
from config_util import Config
from appium_util import Appium
from xpath_util import XpathParseIteration, ElementUid
from log import log
from report_util import LogAndroid
class Crawler:
event_record = namedtuple('event', ['time', 'before_click', 'after_click', 'activity', 'xpath', 'status',
'element_uid'])
travel_mode = None
def __init__(self, config: Config, timer):
self._start_time = datetime.datetime.now()
XpathParseIteration.travel_mode = self.travel_mode
self.max_page_depth = config.config.get('max_depth', 6)
crash_traceback = config.config.get('max_screen')
self.__crash_traceback = deque(maxlen=crash_traceback) if crash_traceback else deque(maxlen=10)
self.seen = set()
self.__config = config
self.white_apps = self.__config.white_apps()
self.black_elements = self.__config.black_elements()
self.black_activities = self.__config.black_activities()
self.white_elements = self.__config.white_elements()
self.base_activities = self.__config.base_activities()
self.last_elements = self.__config.last_elements()
self.first_elements = self.__config.first_elements()
self.selected_elements = self.__config.selected_elements()
self.after_crawl_page = self.__config.after_crawl_page()
self.driver = None
self.init_appium()
self.__current_page = None
self.__record = list()
self.__timer = timer * 60
log.info("Total time: {} seconds".format(self.__timer))
self.__white_element_seen = set()
# init device crash log, remove cache crash log.
self.android_log = LogAndroid(udid=self.__config.udid)
self.android_log.clear_log()
# init report
report_dir = os.path.join(os.path.dirname(__file__), 'reports')
if not os.path.exists(report_dir):
os.mkdir(report_dir)
current_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
self.__report_path = os.path.join(report_dir, current_time + '_' + self.__config.udid)
os.mkdir(self.__report_path)
self.__screenshot_dir = os.path.join(self.__report_path, 'screenshot')
os.mkdir(self.__screenshot_dir)
def __call__(self):
self.run()
def __repr__(self):
return '<{0!r}: {1!r}>'.format(type(self).__name__, self.__config.projectName)
@func_set_timeout(5)
def __get_page_source(self):
if self._start_time + datetime.timedelta(seconds=self.timer) <= datetime.datetime.now():
self.quit()
return self.driver.get_page_source()
def init_appium(self):
self.driver = Appium(desired_caps=self.__config.appium_desired_caps())
def get_page_source(self):
count = 2
while count:
try:
# globally unique refresh __current_page. make sure record newest page info.
self.__current_page = self.__get_page_source()
return self.__current_page
except FunctionTimedOut:
count -= 1
except WebDriverException as e:
log.error(e)
log.warning('refresh appium driver')
self.init_appium()
log.error('Get page source timeout.')
return None
def get_page_info(self):
'''
Returns:
page: PageParse class instance.
current_activity: string
'''
current_page_source = self.get_page_source()
if not current_page_source:
self.driver.click_device_back()
# test: if after click back, still timeout, relaunch app.
try:
self.__get_page_source()
except FunctionTimedOut:
self.driver.launch_app()
log.warning('Get page source timeout, relaunch app.')
except WebDriverException:
log.warning('refresh appium driver')
self.init_appium()
finally:
return self.get_page_info()
else:
current_activity = self.driver.get_current_activity()
page = PageParse(current_page_source, current_activity)
return page
def run(self):
'''
page crawl main control center.
'''
page_stack = deque()
# page_stack: type: list, save wait to crawl page xml data.
# call get_page_info to get current page xml and put to list.
page_stack.append(self.get_page_info())
while page_stack:
# pop list top one to start crawl
page = page_stack.pop()
# check this page is the same as current_page
if page == self.__current_page:
# call crawl_page method to start click this page.
crawl_gen = self.crawl_page(page)
for res in crawl_gen:
if res != 'END':
if self.__is_base_activity(self.driver.get_current_activity()):
page_stack.clear()
page_stack.append(res[0])
break
else:
# check current page stack depth is whether or not more then max_page_depth
if len(page_stack) >= self.max_page_depth:
# click back
log.warning('Current page deep is more than max depth, click back.')
self.driver.click_device_back()
continue
else:
# put current page into stack
page_stack.append(res[1])
# put new page into stack
page_stack.append(res[0])
break
else:
# end this page crawl
break
def crawl_page(self, xpath_generator):
'''
Need received a XpathParse instance.
xpath_generator: XpathParse instance.
'''
if not isinstance(xpath_generator, XpathParseIteration):
# if not a XpathParse instance, try to structure a XpathParse instance.
self.__xpath_generator = XpathParseIteration(xpath_generator,
first_elements_config=self.first_elements,
last_elements_config=self.last_elements)
else:
self.__xpath_generator = xpath_generator
for xpath, node_uid in self.__xpath_generator:
if not self.__before_click(node_uid):
continue
res = self.__click(xpath, node_uid)
# if res not None, represent page change, return to run,
# else click next one
if res is not None:
yield res
else:
# when all page elements is clicked. trigger after crawl page event.
# self.driver.load_long_page_content(times=2)
# self.driver.pull_refresh_page()
self.__after_crawl_page()
current_page = self.get_page_info()
if current_page != self.__xpath_generator:
yield current_page, self.__xpath_generator
log.debug("Current page crawler over!")
if not self.__is_base_activity(self.driver.get_current_activity()):
self.driver.click_device_back()
else:
log.warning("Current page is base activity.")
yield 'END'
def __before_click(self, node_uid: ElementUid):
# check the node whether has been clicked
if node_uid.uid in self.seen:
# log.info('element {} is seen, skip it.'.format(node_uid.uid))
return 0
# check the node whether is in white list seen
# prevent always click white element.
if node_uid.uid in self.__white_element_seen:
# generate a random number to decide this node remove from seen.
if random() <= 0.3:
self.__white_element_seen.remove(node_uid.uid)
log.info('Remove an element from white seen.')
return 0
# check the node whether is in selected list.
if not self.__is_selected_element(node_uid.uid):
# log.warning("Current element not in selected list, not click.\n{}".format(node_uid.uid))
self.seen.add(node_uid.uid)
return 0
# check the node whether is in black list.
if self.__is_black_element(node_uid.uid):
log.warning("Current element in black list, not click. {}.".format(node_uid.uid))
self.seen.add(node_uid.uid)
return 0
return 1
def __click(self, xpath, node_uid: ElementUid):
if xpath != '' and xpath[-1] != '*':
elements = self.driver.find_elements(xpath)
if len(elements) > 0:
# record before click screenshot
screenshot_before_click = self.driver.save_screenshot_as_jpg(self.__screenshot_dir, node_uid.bounds)
try:
elements[0].click()
except Exception as err:
log.error("element click error! {}".format(err))
return None
else:
# record after click screenshot
screenshot_after_click = self.driver.save_screenshot_as_jpg(self.__screenshot_dir)
# log.error("click a element! Path: {}".format(xpath))
# recode click event info.
self.__statistics(xpath, node_uid.uid, screenshot_before_click, screenshot_after_click)
if not self.__is_white_element(node_uid.uid):
self.seen.add(node_uid.uid)
else:
# add to white element seen set
self.__white_element_seen.add(node_uid.uid)
# add random actions
self.driver.monkey_actions()
self.__after_click()
# judge page is change
current_page = self.get_page_info()
if current_page != self.__xpath_generator:
return current_page, self.__xpath_generator
else:
return None
else:
return None
def __after_click(self):
# check is in black activity
if self.__is_black_activity(self.driver.get_current_activity()):
self.driver.click_device_back()
else:
self.__return_white_app()
def __after_crawl_page(self):
for event in self.after_crawl_page:
try:
action = getattr(self.driver, event.get('name', 'None'))
except AttributeError as exc:
log.error(f"Not support {event.get('name', 'None')} event, {exc}")
else:
for _ in range(event.get('times', 0)):
action()
def __return_white_app(self):
for count in range(3):
current_package = self.driver.get_current_package()
if self.__is_white_app(current_package):
break
else:
log.info('current app is not white app!')
self.driver.click_device_back()
else:
log.info('relaunch test app!!')
self.driver.launch_app()
def __is_base_activity(self, activity):
return self.__re_search(activity, self.base_activities)
def __is_white_element(self, node_uid):
return self.__re_search(node_uid, self.white_elements)
def __is_black_element(self, node_uid):
return self.__re_search(node_uid, self.black_elements)
def __is_last_element(self, node_uid):
return self.__re_search(node_uid, self.last_elements)
def __is_white_app(self, app):
return self.__re_search(app, self.white_apps)
def __is_black_activity(self, activity):
return self.__re_search(activity, self.black_activities)
def __is_selected_element(self, node_uid):
return self.__re_search(node_uid, self.selected_elements)
@staticmethod
def __re_search(target, _list):
for item in _list:
if re.search(r'{}'.format(item), target):
return True
return False
def __statistics(self, xpath, node_uid,
screenshot_before_click, screenshot_after_click):
node_uid_split = node_uid.split(':')
activity = node_uid_split[0]
element_uid = ''.join(node_uid_split[1:])
event = self.event_record(time=int(time.time()),
before_click=screenshot_before_click,
after_click=screenshot_after_click,
activity=activity,
xpath=xpath,
status='pass',
element_uid=element_uid)
self.__record.append(event)
@property
def timer(self):
return self.__timer
def quit(self):
log.warning('Appium quit')
self.driver.quit()
@property
def record(self):
self.android_log.collect_log(self.report_path)
return self.__record
@property
def report_path(self):
return self.__report_path