Пример #1
0
def check_website(url):
    config = Configuration()
    site = Site(url)

    browser = Browser()
    browser.start(site)

    # strange behaviour from browsermob proxy, dsn doesn't always work
    browser.add_remap_urls([urlparse(url).hostname])

    browser.get(url)
    browser.study_state()
    actions = site.get_first_connection_unexplored()
    while actions is not None:
        logging.info('%s action(s) needed to reach this connection' % (len(actions)))
        for action in actions:
            action.do()
        browser.study_state()
        actions = site.get_first_connection_unexplored()
    site.show_graph()
    browser.stop()
Пример #2
0
 def test_basic(self):
     from Main.Configuration import Configuration
     from Site.Site import Site
     from Main.Browser import Browser
     from tests.DummyBrowser import DummyBrowser
     dummy = DummyBrowser(random.random())
     config = Configuration(['-b', 'Dummy'])
     config._browser = 'Dummy'
     site_name = str(uuid.uuid4())
     site = Site(site_name)
     browser = Browser(random.random())
     browser.start(site)
     url = "http://{0}/page".format(uuid.uuid4())
     browser.get(url)
     browser.study_state()
     assert len(dummy.actions) == 3
     action_n = 0
     assert dummy.actions[action_n] == {'action': 'get', 'target': url}
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'find_elements_by_tag_name'
     assert dummy.actions[action_n]['target'] == 'input'
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'find_elements_by_tag_name'
     assert dummy.actions[action_n]['target'] == 'a'
     assert len(site._pages) == 2
     assert len(site._connections) == dummy.actions[action_n]['nb'] + 1
     assert len([explored for explored in list(site._connections.values()) if explored['explored']]) == 1
     assert len([unexplored for unexplored in list(site._connections.values()) if not unexplored['explored']]) == dummy.actions[action_n]['nb']
     actions = site.get_first_connection_unexplored()
     assert len(actions) == 1
     actions[0].do()
     assert len(dummy.actions) == 5
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'find_elements_by_css_selector'
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'element.click'
     browser.stop()
     assert len(dummy.actions) == 6
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'quit'
Пример #3
0
 def test_with_browsermobproxy(self):
     from Main.Configuration import Configuration
     from Site.Site import Site
     from Main.Browser import Browser
     from tests.DummyBrowser import DummyBrowser
     dummy = DummyBrowser(random.random())
     config = Configuration(['-b', 'Dummy', '--env', '--proxy-path', '/home/vagrant/browsermob-proxy-2.0-beta-9/bin/'])
     assert config.proxy_path is not None
     config._browser = 'Dummy'
     site_name = str(uuid.uuid4())
     site = Site(site_name)
     browser = Browser(random.random())
     browser.start(site)
     url = "http://{0}/page".format(uuid.uuid4())
     browser.get(url)
     browser.add_remap_urls(['localhost'])
     browser.study_state()
     assert len(dummy.actions) == 3
     assert dummy.actions[0] == {'action': 'get', 'target': url}
     assert dummy.actions[1]['action'] == 'find_elements_by_tag_name'
     assert dummy.actions[1]['target'] == 'input'
     assert dummy.actions[2]['action'] == 'find_elements_by_tag_name'
     assert dummy.actions[2]['target'] == 'a'
     assert len(site._pages) == 2
     assert len(site._connections) == dummy.actions[2]['nb'] + 1
     assert len(site._connections) > 1
     assert len([explored for explored in list(site._connections.values()) if explored['explored']]) == 1
     actions = site.get_first_connection_unexplored()
     assert len(actions) == 1
     actions[0].do()
     assert len(dummy.actions) == 5
     assert dummy.actions[3]['action'] == 'find_elements_by_css_selector'
     assert dummy.actions[4]['action'] == 'element.click'
     browser.stop()
     assert len(dummy.actions) == 6
     assert dummy.actions[5]['action'] == 'quit'
Пример #4
0
 def test_on_path_finding(self):
     from Site.Site import Site
     from Site.Page import Page
     from Main.Action import Action
     site_url = "http://%s.url/for/page" % uuid.uuid4()
     site = Site(site_url)
     node_start = str(uuid.uuid4())
     node_1 = str(uuid.uuid4())
     node_2 = str(uuid.uuid4())
     node_end = str(uuid.uuid4())
     connection_0_id = str(uuid.uuid4())
     connection_0_content = {'from': 'start', 'to': node_start, 'explored': True, 'type': site.ConnectionTypes.START, 'data': {'url': site_url}}
     connection_1_id = str(uuid.uuid4())
     connection_1_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_1_content = {'from': node_start, 'to': node_1, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_1_data}
     connection_2_id = str(uuid.uuid4())
     connection_2_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_2_content = {'from': node_1, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_2_data}
     connection_3_id = str(uuid.uuid4())
     connection_3_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_3_content = {'from': node_2, 'to': node_end, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_3_data}
     connection_4_id = str(uuid.uuid4())
     connection_4_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_4_content = {'from': node_2, 'to': node_1, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_4_data}
     connection_5_id = str(uuid.uuid4())
     connection_5_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_5_content = {'from': node_start, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_5_data}
     connection_6_id = str(uuid.uuid4())
     connection_6_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_6_content = {'from': node_end, 'to': None, 'explored': False, 'type': site.ConnectionTypes.LINK, 'data': connection_6_data}
     site._connections[connection_0_id] = connection_0_content
     site._connections[connection_1_id] = connection_1_content
     site._connections[connection_2_id] = connection_2_content
     site._connections[connection_3_id] = connection_3_content
     site._connections[connection_4_id] = connection_4_content
     site._connections[connection_5_id] = connection_5_content
     site._connections[connection_6_id] = connection_6_content
     html_start = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_start] = Page('node_start', html_start)
     html_1 = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_1] = Page('node_1', html_1)
     html_2 = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_2] = Page('node_2', html_2)
     html_end = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_end] = Page('node_end', html_end)
     site._current = node_start
     path = site.find_shortest_path(node_start, node_end)
     assert path[0] == {'connection': {'from': node_start, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_5_data}, 'id': connection_5_id}
     assert path[1] == {'connection': {'from': node_2, 'to': node_end, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_3_data}, 'id': connection_3_id}
     actions = site.get_actions_to(node_end)
     assert len(actions) == 2
     assert actions[0]._type == Action.ActionType.CLICK
     assert actions[0].connection == connection_5_id
     assert actions[1]._type == Action.ActionType.CLICK
     assert actions[1].connection == connection_3_id
     assert site.get_distance_to(connection_6_id) == 2
     actions = site.get_first_connection_unexplored()
     assert len(actions) == 3
     assert actions[0]._type == Action.ActionType.CLICK
     assert actions[0].connection == connection_5_id
     assert actions[1]._type == Action.ActionType.CLICK
     assert actions[1].connection == connection_3_id
     assert actions[2]._type == Action.ActionType.CLICK
     assert actions[2].connection == connection_6_id
     site._current = node_end
     actions = site.get_first_connection_unexplored()
     assert len(actions) == 1
     assert actions[0]._type == Action.ActionType.CLICK
     assert actions[0].connection == connection_6_id
     html = str('<html><body><div>new_page</div></body></html>')
     page = site.current_page(html, '', connection_6_id)
     assert site._pages[site.get_uniq_id(html, '')] == page
     assert site._connections[connection_6_id]['explored'] == True
     assert site._connections[connection_6_id]['to'] == site.get_uniq_id(html, '')
     actions = site.get_first_connection_unexplored()
     assert actions == None
     gexf = etree.Element('test')
     gexf_site = site.get_gexf()
     assert gexf_site.xpath('//meta/creator')[0].text == "Pallas"
     assert gexf_site.xpath('//meta/description')[0].text == site_url
     assert len(gexf_site.xpath('//nodes/node')) == 6
     assert len(gexf_site.xpath('//edges/edge')) == 7