Exemplo n.º 1
0
def get_feeditem_model(url, timestamp, item):
    """
    Returns a new feeditem model with cleaned attributes.
    Returns none if item already exists.

    Parameters:

    url - The feed's url
    item - A feedparser rss-entry
    """
    for feeditem in FeedItemModel.query(FeedItemModel.key == FeedItemKey(url, item.link)).iter(keys_only=True):
        # Existing feed, so ignore
        return None

    # Some feeds use content, some description.
    # Take the longest.
    d = item.get("description", "")
    try:
        c = item.get("content", None)
        # Can be many content, with different types
        # This might fail for some
        c = c[0].value
    except:
        c = ""

    # Use d
    if len(c) > len(d):
        d = c

    clean_description = strip_bloat(d)

    return FeedItemModel(key=FeedItemKey(url, item.link),
                         title=item.title,
                         description=clean_description,
                         link=item.link,
                         title_stripped=get_snippet(item.title),
                         snippet=get_snippet(clean_description),
                         timestamp=timestamp,
                         feed_link=url,
                         images=get_images(clean_description),
                         published=convert_timestamp(item.get("published", None)),
                         author=item.get("author", None),
                         comments=item.get("comments", None),
                         enclosures=[e.href for e in item.get("enclosures", [])],
                         tags=[t.term for t in item.get("tags", [])])