示例#1
0
def url_handler():
	# given user input URL, check if URL is valid,
	# parse HTML, store elements and frequencies in dictionary
	# encode HTML, add spans and return to template
	url = request.args.get('url')
	try:
		html_object = requests.get(url)
	except requests.exceptions.RequestException:
		return 'The URL you entered is invalid, or is having trouble connecting at this time.\
			    Please enter a new URL.'

	# decode requests unicode object
	html = get_html(html_object)

	# create dictionary of tags and counts
	tags = create_count(html)

	# excape < > and & characters so html can be displayed
	html = encode_html(html)

	# wrap individual tags with spans so they can be highlighted
	html = add_spans(html)
	
	return render_template('results.html', 
							url=url,
							html=html,
							tags=tags)
示例#2
0
def fetch_html():
    """Given URL, fetch html, parse it, and store elements and frequencies as a Python dictionary."""

    input_url = request.form.get('input_url')

    try:
        # Fetch HTML of input url and store as unicode
        page = requests.get(input_url)

    except (requests.exceptions.ConnectionError, requests.exceptions.InvalidURL):
        flash('The URL you entered is either invalid or unavailable. Try again!')
        return redirect('/')

    else:
        html = page.text

        # Replace <, > with HTML entities to display on page
        raw_html = encode_html(html)

        # Add spans to each element so jQuery can select and apply highlight class
        # Have to pass raw_html as Markup object to properly display
        span_html = Markup(add_spans(raw_html))

        # Convert HTML unicode to lxml Tree, build element histogram
        tree = lxml.html.fromstring(page.text)
        frequency = build_element_histogram(tree)

        # Keep track of URL, omitting http:// prefix
        display_url = input_url[7:]

        return render_template('results.html', frequency=frequency, raw_html=span_html, website=display_url)