Python HTML.search示例

编程语言: Python

命名空间/包名称: requests_html

类/类型: HTML

方法/功能: search

hotexamples.com的示例: 3

Python HTML.search - 已找到3个示例。这些是从开源项目中提取的最受好评的requests_html.HTML.search现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

HTML(30)

find(30)

xpath(18)

render(15)

search(3)

element(1)

encoding(1)

get(1)

lower(1)

replace(1)

reverse(1)

sort(1)

url(1)

示例#1

显示文件

def get_torrent_link_from_html(html):
    """Uses somewhat flimsy HTML parsing due to apparent lack of other options.

    Args:
        html (str): string representing html of upload response.

    Returns:
        str: (hopefully) direct link to torrent.

    """

    html = HTML(html=html)
    user_id = html.search('var userid = {};')[0]
    authkey = html.search('var authkey = "{}";')[0]
    passkey = html.search("passkey={}&")[0]
    user_torrents = [
        t for t in html.find('[id^=torrent_]')
        if t.search('user.php?id={}"')[0] == user_id
    ]
    user_torrents_ids_and_dates = [(t.attrs['id'].split('_')[1],
                                    pendulum.from_format(
                                        t.find('span')[0].attrs['title'],
                                        'MMM DD YYYY, HH:mm'))
                                   for t in user_torrents]
    torrent_id, torrent_dt = max(user_torrents_ids_and_dates,
                                 key=lambda x: x[1])
    assert (pendulum.now() - torrent_dt).in_minutes() < 2
    return "https://awesome-hd.me/torrents.php?action=download&id={}&authkey={}&torrent_pass={}".format(
        torrent_id, authkey, passkey)

示例#2

显示文件

文件： main.py 项目： jrc/jrc.github.io

def get_program_info(program_url):
    logging.info(f"Fetching {program_url}")
    r = requests.get(program_url)
    r.raise_for_status()

    html = HTML(html=r.text)
    title = html.find("h1")  # assume this is the title
    if not title:
        logging.warning(f"No title for {program_url}")
        return None

    program_info = {"url": program_url, "title": title[0].text}
    search_res = html.search("vtt: [['PT','{}','{}']]")
    if search_res:
        program_info["vtt_url"] = search_res[1]
    return program_info

示例#3

显示文件

import json

with open('settings.json') as json_data:
    d = json.load(json_data)

WAT_STR = 'https://s1.wcy.wat.edu.pl/ed/'

ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

r = urllib.request.urlopen('https://s1.wcy.wat.edu.pl/ed/', context=ctx)
data = r.read().decode('windows-1252')

html = HTML(html=data)
sid = html.search('sid={}>')[0]
STR = "https://s1.wcy.wat.edu.pl/ed/logged_inc.php?sid=" + sid + "&mid=328&iid=20175&exv=" + d[
    'group'] + "&pos=267.75&rdo=1&t=6801377"

data = [
    ("sid", sid),
    ('formname', 'login'),
    ('default_fun', '1'),
    ('userid', d["login"]),
    ('password', d["password"]),
    ('view_height', '1080'),
    ('view_width', '1920'),
]

#req = urllib.request.Request('https://s1.wcy.wat.edu.pl/ed/index.php?sid='+sid)
#req.add_header('Content-Type', 'application/x-www-form-urlencoded')