Python Article.is_valid_body примеры использования

Язык программирования: Python

Пространство имен/Пакет: newspaper

Класс/Тип: Article

Метод/Функция: is_valid_body

Примеров на hotexamples.com: 3

Python Article.is_valid_body - 3 примера найдено. Это лучшие примеры Python кода для newspaper.Article.is_valid_body, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Article(30)

nlp(30)

set_html(30)

parse(30)

download(30)

build(20)

html(15)

text(11)

download_state(9)

fetch_images(6)

is_valid_url(6)

publish_date(5)

authors(5)

is_downloaded(5)

title(4)

top_image(3)

article_html(3)

keywords(3)

set_text(2)

images(2)

has_top_image(2)

is_valid_body(2)

summary(1)

summarylen(1)

split(1)

set_top_img_no_check(1)

tag(1)

set_title(1)

tags(1)

textlen(1)

set_meta_data(1)

lower(1)

set_keywords(1)

save(1)

prepareSentenceHighlights(1)

nlpEntropy(1)

meta_data(1)

append(1)

is_video(1)

is_parsed(1)

is_media_news(1)

has_video(1)

get_is_news(1)

format_top_node(1)

category_urls(1)

articles(1)

url(1)

Пример #1

Показать файл

Файл: test.py Проект: zixie1991/newspaper

def test():
    url = sys.argv[1]
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    r = requests.get(url, headers=headers)
    t = time.time()
    a = Article(url, language='zh', keep_article_html=True)
    print time.time() - t
    html = to_unicode(r.content)
    a.parse(url=url, html=html)

    print time.time() - t

    print a.title
    print a.top_img
    print a.imgs
    print a.text
    # print a.article_html
    print a.is_valid_body()

Пример #2

Показать файл

class ArticleParser:
    def __init__(self, url):
        self._article = Article(url)
        self._article.download()
        self._article.parse()
        self._text = None

    @property
    def title(self):
        return self._article.title

    @property
    def text(self):
        if not self._text:
            if self._article.is_valid_body():
                self._text = self._article.text
            else:
                self._text = '\n'.join(p.text for p in justext.justext(
                    self._article.html, justext.get_stoplist("English")))

        return self._text

    @property
    def source_url(self):
        return self._article.source_url

    @property
    def date(self):
        return self._article.publish_date

    @date.setter
    def date(self, value):
        self._article.publish_date = value

    def save(self):
        f = open("../news/" + self.title.replace(" ", "_"), "w")
        f.write(self.date.isoformat() + "\n")
        f.write(self.title + "\n")
        f.write(self.source_url + "\n")
        f.write(self.text + "\n")
        f.close()

Пример #3

Показать файл

def get_news_data(url, num_words=None):
    """Retrieves information about the news article"""
    article = Article(url)
    article.download()
    article.parse()
    article.nlp()

    metadata = article.meta_data
    if num_words is None:
        summary_ = summarize(article.text)
    else:
        summary_ = summarize(article.text, words=num_words)
    authors = [metadata['author']]
    for author in article.authors:
        if author not in authors:
            authors.append(author)

    return News(article.title, authors, metadata['description'], article.text,
                article.summary, summary_,
                article.is_valid_body() and article.is_valid_url(),
                metadata['og']['site_name'], metadata['generator'])