Пример #1
0
            commit_to_feed(feed, article)
            log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
             (feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
            return

    # Document parsing.
    try:
        article_content = parser.extract_body(document.text)
        summary = parser.summarise(article_content)
    except Exception, e:
        log("%s: %s: Error parsing %s: %s" %
            (feed.key.name, feed.group.name, url, e.message))
        return

    if not title:
        title = parser.extract_title(document.text)

    # Initial article object
    article = Article(url=url, title=title, summary=summary)

    if not "://" in article.url:
        article.url = "http://" + article.url

    # Determine whether to store the full content or a compressed copy
    if not app.config['COMPRESS_ARTICLES']:
        article.content = article_content
    else:
        article.ccontent = snappy.compress(
            article_content.encode("utf-8", "ignore"))
        article.compressed = True
Пример #2
0
            log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
                (feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
            return

    # Document parsing.
    try:
        article_content = parser.extract_body(document.text)
        summary = parser.summarise(article_content)
    except Exception, e:
        log("%s: %s: Error parsing %s: %s" %
            (feed.key.name, feed.group.name, url, e.message))
        return

    # Ensure a title and disregard dupes
    if not title:
        title = parser.extract_title(document.text)

    if app.config['NO_DUPLICATE_TITLES']:
        if Article.query.filter(
                and_(Article.title == title, Article.key == feed.key)).first():
            return

    # Initial article object
    article = Article(url=url, title=title, summary=summary)

    # Determine whether to store the full content or a compressed copy
    if not app.config['COMPRESS_ARTICLES']:
        article.content = article_content
    else:
        article.ccontent = snappy.compress(
            article_content.encode("utf-8", "ignore"))
Пример #3
0
				article.url = "http://" + article.url
			commit_to_feed(feed, article)
			log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
				(feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
			return

	# Document parsing.
	try:
		article_content = parser.extract_body(document.text)
		summary      = parser.summarise(article_content)
	except Exception, e:
		log("%s: %s: Error parsing %s: %s" % (feed.key.name, feed.group.name, url, e.message))
		return

	if not title:
		title = parser.extract_title(document.text)

	# Initial article object
	article = Article(
		url=url,
		title=title,
		summary=summary
	)

	if not "://" in article.url:
		article.url = "http://" + article.url

	# Determine whether to store the full content or a compressed copy
	if not app.config['COMPRESS_ARTICLES']:
		article.content=article_content
	else:
Пример #4
0
            commit_to_feed(feed, article)
            log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
                (feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
            return

    # Document parsing.
    try:
        article_content = parser.extract_body(document.text)
        summary      = parser.summarise(article_content)
    except Exception, e:
        log("%s: %s: Error parsing %s: %s" % (feed.key.name, feed.group.name, url, e.message))
        return

    # Ensure a title and disregard dupes
    if not title:
        title = parser.extract_title(document.text)

    if app.config['NO_DUPLICATE_TITLES']:
        if Article.query.filter(
            and_(Article.title == title, Article.key == feed.key)
        ).first():
            return

    # Initial article object
    article = Article(
        url=url,
        title=title,
        summary=summary
    )

    # Determine whether to store the full content or a compressed copy