Python Article примеры использования

Язык программирования: Python

Пространство имен/Пакет: Sites

Класс/Тип: Article

Примеров на hotexamples.com: 8

Python Article - 8 примеров найдено. Это лучшие примеры Python кода для Sites.Article, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Article(8)

Основные методы

Article (8)

Пример #1

Показать файл

def get_articles():
    feeds = ["http://feeds.bbci.co.uk/news/rss.xml"]

    article_urls = set()

    for feed in feeds:
        data = requests.get(feed)

        if data.status_code != 200:
            return None

        content = data.content.decode()

        soup = BeautifulSoup(content, features="xml")

        all_articles = soup.find_all("item")

        for a in all_articles:
            href = a.find("link").text
            title = a.find("title").text

            article_urls.add((href, title))

    articles = [
        Article.Article("The BBC", x[0], None, x[1], None)
        for x in article_urls if "/sport/" not in x[0]
    ]

    return articles

Пример #2

Показать файл

def get_articles():
    data = requests.get(f"{BASE}/coffee-house")

    if data.status_code != 200:
        return None

    content = data.content.decode()

    soup = BeautifulSoup(content, features="lxml")

    all_articles = soup.find_all("article")

    article_urls = set()

    for a in all_articles:
        for links in a.find_all("a"):
            header = a.find("h2")

            href = links.attrs["href"]
            if "writer/" in href:
                continue

    title = header.text

    url = "{}{}".format(BASE, href)

    article_urls.add((url, title))

    articles = [
        Article.Article("The Spectator", x[0], None, x[1], None)
        for x in article_urls
    ]

    return articles

Пример #3

Показать файл

Файл: Times.py Проект: radams15/news_gemini

def get_article(url):
    data = requests.get(
        url,
        headers={
            "User-Agent":
            "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
            "X-Forwarded-For": "66.249.66.1",
            "Cookie": ""
        })

    if data.status_code != 200:
        return None

    content = data.content.decode()

    soup = BeautifulSoup(content, features="lxml")

    main = soup.find("main", {"role": "main"})
    strings = list(map(str.strip, main.strings))

    title, author, content = strings[0], strings[1], strings[1:]

    content = "\n\n".join(content)

    return Article.Article("The Times", url, author, title, content)

Пример #4

Показать файл

def get_article(url):
    data = requests.get(url)

    if data.status_code != 200:
        return None

    content = data.content.decode()

    soup = BeautifulSoup(content, features="lxml")

    author = "The BBC"
    title = soup.find("h1", {"id": "main-heading"}).text.strip()

    paragraphs = soup.find_all("div", {"data-component": "text-block"})

    body = "\n\n".join(x.text for x in paragraphs)

    return Article.Article("The BBC", url, author, title, body)

Пример #5

Показать файл

Файл: Times.py Проект: radams15/news_gemini

def get_articles():
    data = requests.get(f"{BASE}")

    if data.status_code != 200:
        return None

    content = data.content.decode()

    soup = BeautifulSoup(content, features="lxml")

    headlines = soup.find_all("div", {"class": "Item-content"})

    article_urls = set()

    for hl in headlines:
        titles = list(hl.strings)

        title = "Unknown"

        title = " | ".join(titles)
        title = title.replace(" | Read the full story", "")
        title = title.title()

        if "play now" in title.lower():
            continue

        try:
            href = hl.find("a", {"class": "js-tracking"}).attrs["href"]

            if not BASE in href:
                href = BASE + href

        except:
            continue

        article_urls.add((href, title))

    articles = [
        Article.Article("The Telegraph", x[0], None, x[1], None)
        for x in article_urls
    ]

    return articles

Пример #6

Показать файл

def get_article(url):
    data = requests.get(url)

    if data.status_code != 200:
        return None

    content = data.content.decode()

    soup = BeautifulSoup(content, features="lxml")

    author = soup.find("span", {"class": "e-byline__author"}).text.strip()
    title = soup.find("h1", {"class": "e-headline"}).text.strip()

    bodies = soup.find_all("div", {"class": "article-body-text"})
    paragraphs = reduce(lambda z, y: z + y, [x.find_all("p") for x in bodies])

    body = "\n\n".join(x.text for x in paragraphs)

    return Article.Article("The Telegraph", url, author, title, body)

Пример #7

Показать файл

def get_articles():
    data = requests.get(f"{BASE}")

    if data.status_code != 200:
        return None

    content = data.content.decode()

    soup = BeautifulSoup(content, features="lxml")

    headlines = soup.find_all("h3", {"class": "list-headline"})

    article_urls = set()

    for hl in headlines:
        titles = list(hl.strings)

        title = "Unknown"

        for t in titles:
            if t.strip(): title = t.strip()

        try:
            href = hl.find("a", {"class": "list-headline__link"}).attrs["href"]

            if not BASE in href:
                href = BASE + href
        except:
            continue

        article_urls.add((href, title))

    articles = [
        Article.Article("The Telegraph", x[0], None, x[1], None)
        for x in article_urls
    ]

    return articles

Пример #8

Показать файл

def get_article(url):
    data = requests.get(url)

    if data.status_code != 200:
        return None

    content = data.content.decode()

    soup = BeautifulSoup(content, features="lxml")

    author = soup.find("h2", {
        "class": "ContentPageAuthor-module__author__name"
    }).text.strip()
    title = soup.find("h1", {
        "class": "ContentPageTitle-module__headline"
    }).text.strip()

    paragraphs = soup.find_all(
        "p", {"class": "ContentPageBodyParagraph-module__paragraph--block"})

    body = "\n\n".join(x.text for x in paragraphs)

    return Article.Article("The Spectator", url, author, title, body)