def test_get_shortest_path(self): automata = Automata() state0 = State('state0') state1 = State('state1') state2 = State('state2') state3 = State('state3') state4 = State('state4') state5 = State('state5') state6 = State('state6') automata.add_state(state0) automata.add_state(state1) automata.add_state(state2) automata.add_state(state3) automata.add_state(state4) automata.add_state(state5) automata.add_state(state6) automata.add_edge(state0, state1, Clickable('0-1')) automata.add_edge(state0, state2, Clickable('0-2')) automata.add_edge(state0, state3, Clickable('0-3')) automata.add_edge(state2, state4, Clickable('2-4')) automata.add_edge(state4, state5, Clickable('4-5')) automata.add_edge(state3, state5, Clickable('3-5')) automata.add_edge(state3, state5, Clickable('5-0')) automata.add_edge(state5, state6, Clickable('5-6')) self.assertEqual(automata.get_shortest_path(state0), []) edges = automata.get_shortest_path(state6) # 0-3, 3-5, 5-6 self.assertEqual([int(e[0].get_id()) for e in edges], [0, 3, 5])
def load_automata(fname): t_start = time.time() assert os.path.isfile(fname) and os.path.exists(fname) automata = Automata() with open(fname) as f: data = json.load(f) for state in data['state']: with open(os.path.join(os.path.dirname(os.path.realpath(fname)), state['dom_path']), 'r') as df: s = State(df.read()) s.set_id(state['id']) for clickable in state['clickable']: c = Clickable(clickable['id'], clickable['xpath'], clickable['tag']) s.add_clickable(c) automata.add_state(s) for edge in data['edge']: from_state = automata.get_state_by_id(edge['from']) to_state = automata.get_state_by_id(edge['to']) clickable = from_state.get_clickable_by_id(edge['clickable']) assert from_state and to_state and clickable automata.add_edge(from_state, to_state, clickable) return automata
class B2gCrawler(Crawler): def __init__(self, configuration, executor): self.automata = Automata() self.configuration = configuration self.executor = executor self.exe_stack = [] # stack of executed clickables (events) self.invariant_violation = [] self.num_clickables = { 'unexamined': 0, # num of candidate clickables found with rules in DomAnalyzer 'true': 0, # num of clickables triggered new state (different screen dom) 'false': 0, # num of clickables not triggering new state } def run(self): self.executor.restart_app() initial_state = State(self.executor.get_source()) self.automata.add_state(initial_state) self.save_screenshot(initial_state.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(initial_state) self.crawl(1) self.invariant_violation = sorted(self.invariant_violation, key=lambda k: int(k['state'])) return self.automata, self.invariant_violation, self.num_clickables def crawl(self, depth, prev_state=None): if depth <= self.configuration.get_max_depth(): cs = self.automata.get_current_state() if not self.violate_invariant(cs.get_dom(), cs.get_id()): candidate_clickables = DomAnalyzer.get_clickables(cs.get_dom(), prev_state.get_dom() if prev_state else None) self.num_clickables['unexamined'] += len(candidate_clickables) for clickable in candidate_clickables: # prefetch image of the clickable time.sleep(0.2) # time for correctly fetching image img_name = cs.get_id() + '-' + clickable.get_id() + '.png' img_data = self.executor.get_screenshot(clickable) # fire the clickable logger.debug('Fire event in state %s', cs.get_id()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) ft = FireEventThread(self.executor, clickable) ft.start() ft.join(self.configuration.get_sleep_time()*2) # time out after sleep_time*2 seconds if ft.is_alive(): # timed out logger.error('No response while firing an event. Execution sequences:') self.exe_stack.append(clickable) # add the clickable triggering No Response for c in self.exe_stack: logger.error(c) logger.error('Total clickables found: %d (true: %d, false: %d, unexamined: %d)', self.num_clickables['unexamined'] + self.num_clickables['true'] + self.num_clickables['false'], self.num_clickables['true'], self.num_clickables['false'], self.num_clickables['unexamined'] ) logger.error('Program terminated.') sys.exit() time.sleep(self.configuration.get_sleep_time()) self.num_clickables['unexamined'] -= 1 new_dom = self.executor.get_source() if DomAnalyzer.is_equal(cs.get_dom(), new_dom): self.num_clickables['false'] += 1 else: self.num_clickables['true'] += 1 cs.add_clickable(clickable) self.exe_stack.append(clickable) self.save_screenshot(img_name, img_data, 'clickable') ns, is_newly_added = self.automata.add_state(State(new_dom)) self.automata.add_edge(cs, ns, clickable) if is_newly_added: self.save_screenshot(ns.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(ns) self.automata.change_state(ns) self.crawl(depth+1, cs) self.exe_stack.pop(-1) self.automata.change_state(cs) self.backtrack(cs) def backtrack(self, state): logger.debug('Backtrack to state %s', state.get_id()) edges = self.automata.get_shortest_path(state) self.executor.restart_app() for (state_from, state_to, clickable, cost) in edges: time.sleep(self.configuration.get_sleep_time()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) self.executor.fire_event(clickable) def save_screenshot(self, fname, b64data, my_type): path = os.path.join(self.configuration.get_abs_path(my_type), fname) imgdata = base64.b64decode(b64data) with open(path, 'wb') as f: f.write(imgdata) def save_dom(self, state): with open(os.path.join(self.configuration.get_abs_path('dom'), state.get_id() + '.txt'), 'w') as f: f.write(state.get_dom()) def violate_invariant(self, dom, statd_id): is_violated = False for inv in self.configuration.get_invariants(): if inv.check(dom): is_violated = True violation = { 'state': statd_id, 'name': str(inv), 'sequence': list(self.exe_stack) # shallow copy of clickables } self.invariant_violation.append(violation) return is_violated
class B2gCrawler(Crawler): def __init__(self, configuration, executor): self.automata = Automata() self.configuration = configuration self.executor = executor self.exe_stack = [] # stack of executed clickables (events) self.invariant_violation = [] self.num_clickables = { 'unexamined': 0, # num of candidate clickables found with rules in DomAnalyzer 'true': 0, # num of clickables triggered new state (different screen dom) 'false': 0, # num of clickables not triggering new state } def run(self): self.executor.restart_app() initial_state = State(self.executor.get_source()) self.automata.add_state(initial_state) self.save_screenshot(initial_state.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(initial_state) self.crawl(1) self.invariant_violation = sorted(self.invariant_violation, key=lambda k: int(k['state'])) return self.automata, self.invariant_violation, self.num_clickables def crawl(self, depth, prev_state=None): if depth <= self.configuration.get_max_depth(): cs = self.automata.get_current_state() if not self.violate_invariant(cs.get_dom(), cs.get_id()): candidate_clickables = DomAnalyzer.get_clickables( cs.get_dom(), prev_state.get_dom() if prev_state else None) self.num_clickables['unexamined'] += len(candidate_clickables) for clickable in candidate_clickables: # prefetch image of the clickable time.sleep(0.2) # time for correctly fetching image img_name = cs.get_id() + '-' + clickable.get_id() + '.png' img_data = self.executor.get_screenshot(clickable) # fire the clickable logger.debug('Fire event in state %s', cs.get_id()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) ft = FireEventThread(self.executor, clickable) ft.start() ft.join(self.configuration.get_sleep_time() * 2) # time out after sleep_time*2 seconds if ft.is_alive(): # timed out logger.error( 'No response while firing an event. Execution sequences:' ) self.exe_stack.append( clickable ) # add the clickable triggering No Response for c in self.exe_stack: logger.error(c) logger.error( 'Total clickables found: %d (true: %d, false: %d, unexamined: %d)', self.num_clickables['unexamined'] + self.num_clickables['true'] + self.num_clickables['false'], self.num_clickables['true'], self.num_clickables['false'], self.num_clickables['unexamined']) logger.error('Program terminated.') sys.exit() time.sleep(self.configuration.get_sleep_time()) self.num_clickables['unexamined'] -= 1 new_dom = self.executor.get_source() if DomAnalyzer.is_equal(cs.get_dom(), new_dom): self.num_clickables['false'] += 1 else: self.num_clickables['true'] += 1 cs.add_clickable(clickable) self.exe_stack.append(clickable) self.save_screenshot(img_name, img_data, 'clickable') ns, is_newly_added = self.automata.add_state( State(new_dom)) self.automata.add_edge(cs, ns, clickable) if is_newly_added: self.save_screenshot( ns.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(ns) self.automata.change_state(ns) self.crawl(depth + 1, cs) self.exe_stack.pop(-1) self.automata.change_state(cs) self.backtrack(cs) def backtrack(self, state): logger.debug('Backtrack to state %s', state.get_id()) edges = self.automata.get_shortest_path(state) self.executor.restart_app() for (state_from, state_to, clickable, cost) in edges: time.sleep(self.configuration.get_sleep_time()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) self.executor.fire_event(clickable) def save_screenshot(self, fname, b64data, my_type): path = os.path.join(self.configuration.get_abs_path(my_type), fname) imgdata = base64.b64decode(b64data) with open(path, 'wb') as f: f.write(imgdata) def save_dom(self, state): with open( os.path.join(self.configuration.get_abs_path('dom'), state.get_id() + '.txt'), 'w') as f: f.write(state.get_dom()) def violate_invariant(self, dom, statd_id): is_violated = False for inv in self.configuration.get_invariants(): if inv.check(dom): is_violated = True violation = { 'state': statd_id, 'name': str(inv), 'sequence': list(self.exe_stack) # shallow copy of clickables } self.invariant_violation.append(violation) return is_violated
class AutomataTestCase(unittest.TestCase): def setUp(self): dom1 = ''' <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title></title> </head> <body>dom1</body> </html> ''' state = State(dom1) self.automata = Automata() self.automata.add_state(state) self.assertEqual(len(self.automata.get_states()), 1) self.assertEqual(self.automata.get_initial_state().get_id(), self.automata.get_current_state().get_id()) self.assertEqual(self.automata.get_current_state().get_id(), '0') # test adding and removing inputs and forms form1 = FormField('form1') form1.add_input(InputField('username', '//*[@id="username"]', 'castman')) form1.add_input(InputField('password', '', 'p@ssw0rd')) form1.add_input(InputField('password', '', 'p@ssw0rd')) self.assertEqual(len(form1.get_inputs()), 2) form1.remove_input(InputField('username', '//*[@id="username"]', 'castman')) self.assertEqual(len(form1.get_inputs()), 1) form2 = FormField('', '//*[@id="lst-ib"]') clickable = Clickable('', '//*[@id="btn1"]') clickable.add_form(form1) clickable.add_form(FormField('form1')) clickable.add_form(form2) self.assertEqual(len(clickable.get_forms()), 2) clickable.remove_form(FormField('', '//*[@id="lst-ib"]')) self.assertEqual(len(clickable.get_forms()), 1) # add the clickable into state 0 self.automata.get_current_state().add_clickable(clickable) def test_automata(self): dom1 = self.automata.get_current_state().get_dom() dom1 += '<custom></custom>' dom2 = dom1 state1 = State(dom1) state2 = State(dom2) self.automata.add_state(state1) state3, is_newly_added = self.automata.add_state(state2) self.assertTrue(state3 == state1) self.assertFalse(is_newly_added) self.assertEqual(len(self.automata.get_states()), 2) clickable = self.automata.get_current_state().get_clickables()[0] clickable2 = Clickable('', '//html/body/button[3]') self.assertEqual(len(self.automata.get_current_state().get_clickables()), 1) self.automata.get_current_state().add_clickable(clickable) self.automata.get_current_state().add_clickable(clickable2) self.automata.get_current_state().add_clickable(clickable2) self.assertEqual(len(self.automata.get_current_state().get_clickables()), 2) self.automata.add_edge(self.automata.get_current_state(), state1, self.automata.get_current_state().get_clickables()[0]) self.assertEqual(len(self.automata.get_edges()), 1) state1.add_prev_state(self.automata.get_current_state()) self.assertEqual(self.automata.get_current_state().get_id(), '0') self.automata.change_state(state1) self.assertEqual(self.automata.get_initial_state().get_id(), '0') self.assertEqual(self.automata.get_current_state().get_id(), '1') self.assertEqual(self.automata.get_current_state().get_prev_states()[0].get_id(), '0') ''' for s in self.automata.get_states(): print s for c in s.get_clickables(): print c for f in c.get_forms(): print f for _i in f.get_inputs(): print _i for (state_from, state_to, clickable, cost) in self.automata.get_edges(): print state_from, state_to, clickable, cost ''' def test_get_shortest_path(self): automata = Automata() state0 = State('state0') state1 = State('state1') state2 = State('state2') state3 = State('state3') state4 = State('state4') state5 = State('state5') state6 = State('state6') automata.add_state(state0) automata.add_state(state1) automata.add_state(state2) automata.add_state(state3) automata.add_state(state4) automata.add_state(state5) automata.add_state(state6) automata.add_edge(state0, state1, Clickable('0-1')) automata.add_edge(state0, state2, Clickable('0-2')) automata.add_edge(state0, state3, Clickable('0-3')) automata.add_edge(state2, state4, Clickable('2-4')) automata.add_edge(state4, state5, Clickable('4-5')) automata.add_edge(state3, state5, Clickable('3-5')) automata.add_edge(state3, state5, Clickable('5-0')) automata.add_edge(state5, state6, Clickable('5-6')) self.assertEqual(automata.get_shortest_path(state0), []) edges = automata.get_shortest_path(state6) # 0-3, 3-5, 5-6 self.assertEqual([int(e[0].get_id()) for e in edges], [0, 3, 5]) #for e in edges: # print e[0].get_id(), e[1].get_id(), e[2].get_id() def test_load_save(self): automata = Automata(fname='test_data/automata-example.json') config = B2gConfiguration('test-app-name', 'test-app-id', mkdir=False) config.set_path('root', 'test_data') config.set_path('dom', 'test_data/dom') config.set_path('state', 'test_data/screenshot/state') config.set_path('clickable', 'test_data/screenshot/clickable') saved_file_path = automata.save(config) import filecmp self.assertTrue(filecmp.cmp('test_data/automata-example.json', 'test_data/automata.json')) try: os.remove(saved_file_path) except OSError: pass