Python get_topic_page示例

编程语言: Python

命名空间/包名称: wiki_requests

方法/功能: get_topic_page

hotexamples.com的示例: 12

Python get_topic_page - 已找到12个示例。这些是从开源项目中提取的最受好评的wiki_requests.get_topic_page现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： main.py 项目： GrishinAnton/python-start

def get_topic_words(topic):
    html_content = get_topic_page(topic)

    more_link_word = get_some_more_links_word(html_content)

    if len(more_link_word):
        #Ограничение тремя ссылками, чтобы быстрее было
        for word in more_link_word[0:3]:
            html_content += get_topic_page(word)

    words = re.findall("[а-яА-я\-\']{3,}", html_content)
    return words

示例#2

显示文件

def get_wiki_links(link):
    html_content = get_topic_page(link)
    soup = BS(html_content, 'html.parser')
    links = soup.find_all("a")
    links = [link.get('href', '') for link in links]
    links = [
        link for link in links
        if re.search('/wiki/', link) and not re.search('./wiki/', link)
    ]
    return links

示例#3

显示文件

文件： links.py 项目： ivadimn/py-input

def get_topic_tables(topic):
    html_content = get_topic_page(topic)
    soup = BS(html_content, "html.parser")
    tables = soup.find_all("table")
    tbs = soup.select("table.standard")
    for t in tbs:
        trs = t.select("tr")
        print(len(trs))
    hrs = [t.get("class", "") for t in tables]
    print(hrs)
    return hrs

示例#4

显示文件

def get_topic_text(topic):
    html_content = get_topic_page(topic)
    words = re.findall("[а-яА-Я\-\']+", html_content)
    text = " ".join(words)
    return text

示例#5

显示文件

def get_topic_words(topic):
    html_content = get_topic_page(topic)
    words = re.findall("[а-яА-Я\-\']+", html_content)
    return words

示例#6

显示文件

def get_topic_words(link):
    html_content = get_topic_page(link)
    words = re.findall("[а-яА-Я\-']{3,}", html_content)
    return words

示例#7

显示文件

def get_topic_words(topic):
    html_content = get_topic_page(topic)
    words = re.findall("[а-яёА-Я\-\']{3,}", html_content)
    #text = " ".join(words)
    return words

示例#8

显示文件

文件： links.py 项目： ivadimn/py-input

def get_topic_links(topic):
    html_content = get_topic_page(topic)
    soup = BS(html_content, "html.parser")
    links = soup.find_all("tr")
    print(links)

示例#9

显示文件

文件： links.py 项目： ivadimn/py-input

def get_neighbo_pages(topic):
    nlinks = get_neighbo_links(topic)
    html_pages = [get_topic_page(n) for n in nlinks]
    return html_pages

示例#10

显示文件

文件： index.py 项目： podoynitsyn-va/GEEKBRAINS-Learning

def get_topic_words(topic):
    html_content = get_topic_page(topic)
    words = re.findall(r'[а-яА-Я][а-яА-Я\-\']+[а-яА-Я]', html_content)
    return [
        w.capitalize() for w in words
    ]  # Добавил капитализацию, потому что Дерево и дерево считались разными

示例#11

显示文件

def get_topic_links(topic):
    html_content = get_topic_page(topic)
    soup = BS(html_content, "html.parser")
    links = soup.find_all("a")
    hrefs = [n.get("href", "") for n in links]
    return hrefs

示例#12

显示文件

def get_topic_words(topic):
    html_content = get_topic_page(topic)
    words = re.findall("[а-яА-Я\-\']+", html_content)
    # слова, в которых более 3-х букв
    words = re.findall("[а-яА-Я\-\']{3,}", html_content)
    return words