def back_to_start(): logging.info('getting back to start point') site = Site() #browser = Browser() #browser.get() site.back_to_start() return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
def follow_existing_connections(): infos = request.get_json() logging.info('%s' % (infos['target'])) site = Site() actions = site.get_actions_to(infos['target']) for action in actions: action.do() return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
def add_connection_and_go(): connection = request.get_json() logging.info('%s - %s' % (connection['css'], connection['nb'])) site = Site() connection_id = site.add_connection_to_current_page(Action.ActionType.CLICK, connection['css'], connection['nb']) action = site.get_action_from_id(connection_id) action.do() return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
def get_from_start(): starter = request.get_json() site = Site() if site.current != 'start': abort(500) browser = Browser() browser.get(starter['url']) return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
def follow(connection): config = Configuration() site = Site() if not connection in site._connections: abort(404) if site._connections[connection]['from'] != site.current: abort(500) action = site.get_action_from_id(connection) action.do() return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
def start(): starter = request.get_json() Configuration().browser = starter['browser'] if starter['proxy'] == 'no proxy': Configuration().proxy_path = None else: Configuration().proxy_path = starter['proxy_path'] site = Site(starter['name']) browser = Browser() browser.start(site) return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
def test_on_path_finding(self): from Site.Site import Site from Site.Page import Page from Main.Action import Action site_url = "http://%s.url/for/page" % uuid.uuid4() site = Site(site_url) node_start = str(uuid.uuid4()) node_1 = str(uuid.uuid4()) node_2 = str(uuid.uuid4()) node_end = str(uuid.uuid4()) connection_0_id = str(uuid.uuid4()) connection_0_content = {'from': 'start', 'to': node_start, 'explored': True, 'type': site.ConnectionTypes.START, 'data': {'url': site_url}} connection_1_id = str(uuid.uuid4()) connection_1_data = {'css': str(uuid.uuid4()), 'nb': 0} connection_1_content = {'from': node_start, 'to': node_1, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_1_data} connection_2_id = str(uuid.uuid4()) connection_2_data = {'css': str(uuid.uuid4()), 'nb': 0} connection_2_content = {'from': node_1, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_2_data} connection_3_id = str(uuid.uuid4()) connection_3_data = {'css': str(uuid.uuid4()), 'nb': 0} connection_3_content = {'from': node_2, 'to': node_end, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_3_data} connection_4_id = str(uuid.uuid4()) connection_4_data = {'css': str(uuid.uuid4()), 'nb': 0} connection_4_content = {'from': node_2, 'to': node_1, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_4_data} connection_5_id = str(uuid.uuid4()) connection_5_data = {'css': str(uuid.uuid4()), 'nb': 0} connection_5_content = {'from': node_start, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_5_data} connection_6_id = str(uuid.uuid4()) connection_6_data = {'css': str(uuid.uuid4()), 'nb': 0} connection_6_content = {'from': node_end, 'to': None, 'explored': False, 'type': site.ConnectionTypes.LINK, 'data': connection_6_data} site._connections[connection_0_id] = connection_0_content site._connections[connection_1_id] = connection_1_content site._connections[connection_2_id] = connection_2_content site._connections[connection_3_id] = connection_3_content site._connections[connection_4_id] = connection_4_content site._connections[connection_5_id] = connection_5_content site._connections[connection_6_id] = connection_6_content html_start = '<html><body>%s</body></html>' % uuid.uuid4() site._pages[node_start] = Page('node_start', html_start) html_1 = '<html><body>%s</body></html>' % uuid.uuid4() site._pages[node_1] = Page('node_1', html_1) html_2 = '<html><body>%s</body></html>' % uuid.uuid4() site._pages[node_2] = Page('node_2', html_2) html_end = '<html><body>%s</body></html>' % uuid.uuid4() site._pages[node_end] = Page('node_end', html_end) site._current = node_start path = site.find_shortest_path(node_start, node_end) assert path[0] == {'connection': {'from': node_start, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_5_data}, 'id': connection_5_id} assert path[1] == {'connection': {'from': node_2, 'to': node_end, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_3_data}, 'id': connection_3_id} actions = site.get_actions_to(node_end) assert len(actions) == 2 assert actions[0]._type == Action.ActionType.CLICK assert actions[0].connection == connection_5_id assert actions[1]._type == Action.ActionType.CLICK assert actions[1].connection == connection_3_id assert site.get_distance_to(connection_6_id) == 2 actions = site.get_first_connection_unexplored() assert len(actions) == 3 assert actions[0]._type == Action.ActionType.CLICK assert actions[0].connection == connection_5_id assert actions[1]._type == Action.ActionType.CLICK assert actions[1].connection == connection_3_id assert actions[2]._type == Action.ActionType.CLICK assert actions[2].connection == connection_6_id site._current = node_end actions = site.get_first_connection_unexplored() assert len(actions) == 1 assert actions[0]._type == Action.ActionType.CLICK assert actions[0].connection == connection_6_id html = str('<html><body><div>new_page</div></body></html>') page = site.current_page(html, '', connection_6_id) assert site._pages[site.get_uniq_id(html, '')] == page assert site._connections[connection_6_id]['explored'] == True assert site._connections[connection_6_id]['to'] == site.get_uniq_id(html, '') actions = site.get_first_connection_unexplored() assert actions == None gexf = etree.Element('test') gexf_site = site.get_gexf() assert gexf_site.xpath('//meta/creator')[0].text == "Pallas" assert gexf_site.xpath('//meta/description')[0].text == site_url assert len(gexf_site.xpath('//nodes/node')) == 6 assert len(gexf_site.xpath('//edges/edge')) == 7
def go_to_url(): call = request.get_json() browser = Browser() site = Site() browser.get(call['url']) return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)