Python download_html примеры использования

Язык программирования: Python

Пространство имен/Пакет: common

Метод/Функция: download_html

Примеров на hotexamples.com: 7

Python download_html - 7 примеров найдено. Это лучшие примеры Python кода для common.download_html, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from xml.etree import ElementTree as ET
import re, sys
import common as beerlib

html = beerlib.download_html('https://untappd.com/v/jbm-brew-lab-pub/4222393')
if not html:
    exit(-1)

beerlib.process_untappd(html, 'JBM Brew Lab Pub', sys.argv)

Пример #2

Показать файл

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from xml.etree import ElementTree as ET
import re, sys
import common as beerlib

html = beerlib.download_html('http://ochutnavkovapivnice.cz/prave_na_cepu/')
if not html:
    exit(-1)

reg = re.compile('(<table.*</table>)', re.MULTILINE | re.DOTALL)
html = reg.search(html)
if not html:
    exit(-2)
html = html.group(0)
html = html.replace('&', '&#038;')
table = ET.XML(html)

rows = iter(table)
headers = [col[0].text for col in next(rows)]
output = []
for row in rows:
    tds = iter(row)
    beer = next(tds)[0][0].text
    values = [beer] + [col.text for col in tds]
    output = output + [values]

beerlib.parser_output(output, headers, 'Ochutnávková pivnice', sys.argv)

Пример #3

Показать файл

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import re, sys
from xml.etree import ElementTree as ET
import common as beerlib

html = beerlib.download_html('https://maltworm.cz/dnes-na-cepu/')
if not html:
    exit(-1)

reg = re.compile('(<body.*</body>)', re.MULTILINE | re.DOTALL)
body = reg.search(html).group(0)
content = re.sub('<script.*</script>',
                 '',
                 body,
                 flags=re.MULTILINE | re.DOTALL)

table = ET.XML(content)
articles = table.findall(".//article")

headers = ['Pivo', 'Typ', 'EPM', 'Alk.', 'IBU', 'Pivovar', 'Město']
output = []
for article in articles:
    beer = article.find(
        ".//p[@class='elementor-heading-title elementor-size-default']")
    info = article.findall(".//span[@class='elementor-icon-list-text']")
    info = iter(info)
    values = [beer.text] + ["".join(i.itertext()) for i in info]

    # get rid of 'IBU:' prefix

Пример #4

Показать файл

Файл: uuparser.py Проект: thorgrin/beerlist

    div = checkin.find('.//div[@data-rating]')
    if div is not None:
        return div.get('data-rating')
    return None


if len(sys.argv) != 3:
    print("uuparser requires two arguments: untappd_username irc_nick")
    exit(-1)

cwd = os.path.dirname(os.path.realpath(__file__))
CACHE_FILE = os.path.join(cwd, '../cache/untappd.json')
USER = sys.argv[1]
IRC_NICK = sys.argv[2]

html = beerlib.download_html('https://untappd.com/user/' + USER)
if not html:
    exit(-2)

reg = re.compile('(<div id="main-stream.*?)<script', re.MULTILINE | re.DOTALL)
body = reg.search(html)
if not body:
    exit(-3)

body = body.group(1)
content = re.sub('<script.*?</script>',
                 '',
                 body,
                 flags=re.MULTILINE | re.DOTALL)
content = re.sub('<img.*?>', '', content, flags=re.MULTILINE | re.DOTALL)
content = content.replace('&', '&#038;')

Пример #5

Показать файл

Файл: faparser.py Проект: thorgrin/beerlist

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import common as beerlib

html = beerlib.download_html('https://untappd.com/v/fa-bar-oranzova/1728532')
if not html:
    exit(-1)

beerlib.process_untappd(html, 'F.A. Bar Oranžová', sys.argv)

Пример #6

Показать файл

Файл: craftparser.py Проект: thorgrin/beerlist

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys, re, json
from xml.etree import ElementTree as ET
import common as beerlib

curl_ua = 'curl/7.54.1'

# first we need the post ID
html = beerlib.download_html('https://m.facebook.com/page_content_list_view/more/?page_id=1871132519814729&start_cursor=10000&num_to_fetch=10&surface_type=timeline', curl_ua)
if not html:
	exit(-1)

reg = re.compile('(<p>.*?</p>)')

# find post ids
ids = re.findall('top_level_post_id&quot;:&quot;([0-9]+)', html)

# Look at all articles until some beers are found
for content_id in ids:
	post_url = "https://m.facebook.com/story.php?story_fbid=%s&id=%s" % (content_id, '1871132519814729')
	# print(post_url)

	# Okay, let's get the post
	post_html = beerlib.download_html(post_url, curl_ua)
	if not post_html:
		continue

	paragraphs = reg.findall(post_html)

Пример #7

Показать файл

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys, re, json
from xml.etree import ElementTree as ET
import common as beerlib

# first we need the post ID
html = beerlib.download_html('https://m.facebook.com/Craftbeerbottleshopbar/')
if not html:
    exit(-1)
# first we need the post ID

reg = re.compile('(<body.*</body>)', re.MULTILINE | re.DOTALL)
body = reg.search(html).group(0)

page = ET.XML(body)
#articles = page.findall(".//div[@role='article']")
articles = page.findall(".//article")
#print(body)

# Look at all articles until some beers are found
for article in articles:
    data_element = article.get('data-ft')
    if not data_element:
        continue
    data = json.loads(data_element)
    post_url = "https://m.facebook.com/story.php?story_fbid=%s&id=%s" % (
        data['top_level_post_id'], data['content_owner_id_new'])
    #	print(post_url)