Exemplo n.º 1
0
def news_item_from_feed_item(feed_item: FeedItem, feed: Feed,
                             user: User) -> NewsItem:
    return NewsItem(
        feed_id=feed_item.feed_id,
        user_id=user.user_id,
        feed_item_id=feed_item.feed_item_id,
        feed_title=feed.title,
        title=feed_item.title,
        description=feed_item.description,
        link=feed_item.link,
        published=feed_item.published or now_in_utc(),
        favicon=determine_favicon_link(feed_item),
        created_on=now_in_utc(),
    )
Exemplo n.º 2
0
def gemeente_groningen_parser(feed: Feed, html_source: str) -> List[FeedItem]:
    soup = BeautifulSoup(html_source, features="html.parser")
    articles = soup.find_all("article")
    return [
        FeedItem(
            feed_id=feed.feed_id,
            title=_title(article),
            description=_description(article),
            link=_link(article),
            last_seen=now_in_utc(),
            published=(datetime.fromisoformat(
                article.find("time")["datetime"])
                       if article.find("time") else now_in_utc()),
            created_on=now_in_utc(),
        ) for article in articles
    ]
Exemplo n.º 3
0
class NewsItem(BaseModel):  # pylint: disable=too-few-public-methods
    news_item_id: PyObjectId = Field(default_factory=uuid4_str, alias="_id")
    feed_id: PyObjectId
    user_id: PyObjectId
    feed_item_id: PyObjectId

    feed_title: str
    title: str
    description: str
    link: str
    published: datetime
    alternate_links: List[str] = Field(default_factory=list)
    alternate_title_links: List[str] = Field(default_factory=list)
    alternate_favicons: List[str] = Field(default_factory=list)
    favicon: Optional[str]

    created_on: datetime = now_in_utc()
    is_read: bool = False
    is_saved: bool = False
    is_read_on: Optional[datetime] = None
    saved_news_item_id: Optional[PyObjectId] = None

    def append_alternate(self, link: str, title: str, icon_link: str) -> None:
        """Append an alternate source for the news. Only appended if not yet present."""
        if link not in self.alternate_links:
            self.title = f"[Updated] {self.title}" if not self.title.startswith(
                "[Updated] ") else self.title
            self.alternate_title_links.append(title)
            self.alternate_links.append(link)
            self.alternate_favicons.append(icon_link)
Exemplo n.º 4
0
def rss_document_to_feed_items(feed: Feed,
                               tree: ElementBase) -> List[FeedItem]:
    """Creates a list of FeedItem objects from a xml tree for the feed."""
    item_elements = tree.findall("channel/item")
    return [
        FeedItem(
            feed_id=feed.feed_id,
            title=item_element.findtext("title"),
            link=sanitize_link(item_element.findtext("link")),
            description=parse_description(
                item_element.findtext("description")),
            last_seen=now_in_utc(),
            published=_parse_optional_rss_datetime(
                item_element.findtext("pubDate")),
            created_on=now_in_utc(),
        ) for item_element in item_elements
    ]
Exemplo n.º 5
0
def atom_document_to_feed_items(feed: Feed,
                                tree: ElementBase) -> List[FeedItem]:
    item_elements = tree.findall("{http://www.w3.org/2005/Atom}entry")
    return [
        FeedItem(
            feed_id=feed.feed_id,
            title=item_element.findtext("{http://www.w3.org/2005/Atom}title"),
            link=_parse_optional_link_for_href(
                item_element.find("{http://www.w3.org/2005/Atom}link")),
            description=item_element.findtext(
                "{http://www.w3.org/2005/Atom}content") or "",
            last_seen=now_in_utc(),
            published=_parse_optional_datetime(
                item_element.findtext(
                    "{http://www.w3.org/2005/Atom}published")),
            created_on=now_in_utc(),
        ) for item_element in item_elements
    ]
Exemplo n.º 6
0
class SavedNewsItem(BaseModel):
    saved_news_item_id: PyObjectId = Field(default_factory=uuid4_str,
                                           alias="_id")

    feed_id: PyObjectId
    user_id: PyObjectId
    feed_item_id: PyObjectId
    news_item_id: PyObjectId

    feed_title: str
    title: str
    description: str
    link: str
    published: datetime
    alternate_links: List[str] = Field(default_factory=list)
    alternate_title_links: List[str] = Field(default_factory=list)
    alternate_favicons: List[str] = Field(default_factory=list)
    favicon: Optional[str]

    created_on: datetime
    saved_on: datetime = now_in_utc()
Exemplo n.º 7
0
async def upsert_new_items_for_feed(
        feed: Feed, updated_feed: Feed,
        feed_items_from_rss: List[FeedItem]) -> UpdateResult:
    """
    Upload new items as feed item and news item for users.

    - Upload all the feed-items if feed item did not exist yet.
    - If feed-item exists, tick the last_seen timestamp.
    - For all subscribed users, make news items and upsert new feed items.
    - Set number_of_items, last_fetched and mutable details for the feed itself.

    returns: Number of new NewsItems created.
    """
    current_feed_items = await repositories(
    ).feed_item_repository.fetch_all_for_feed(feed)
    subscribed_users = await repositories(
    ).user_repository.fetch_subscribed_to(feed)

    updated_feed_items: List[FeedItem] = [
    ]  # updated feed_items that will be updated.
    new_feed_items: List[FeedItem] = [
    ]  # new feed_items that will be inserted.
    new_news_items: List[NewsItem] = []  # news items that will be inserted.
    updated_news_items: List[NewsItem] = []  # news items that are updated.
    update_result = UpdateResult()

    for user in subscribed_users:
        number_of_new_items = 0
        current_news_items = await repositories(
        ).news_item_repository.fetch_all_non_read_for_feed(feed, user)
        for new_feed_item in feed_items_from_rss:
            feed_items_with_same_link = [
                item for item in current_feed_items
                if item.link == new_feed_item.link
            ]
            if len(feed_items_with_same_link
                   ) > 0:  # We have seen this item already, update last seen.
                for feed_item in feed_items_with_same_link:
                    feed_item.last_seen = now_in_utc()
                updated_feed_items.extend(feed_items_with_same_link)
            else:
                # New feed item.
                new_feed_items.append(new_feed_item)
                current_feed_items.append(new_feed_item)

                # Check if there is already a similar news item to flag alternates.
                news_items_similar_titles = [
                    news_item for news_item in current_news_items
                    if are_titles_similar(title_1=news_item.title,
                                          title_2=new_feed_item.title)
                ]
                # If no similar news items, just insert new news item and feed item, else update existing news item.
                if len(news_items_similar_titles) == 0:
                    new_news_item = news_item_from_feed_item(
                        new_feed_item, feed, user)
                    new_news_items.append(new_news_item)
                    number_of_new_items += 1
                    current_news_items.append(new_news_item)
                else:
                    for existing_news_item in news_items_similar_titles:
                        existing_news_item.append_alternate(
                            new_feed_item.link, new_feed_item.title,
                            determine_favicon_link(new_feed_item))
                        existing_news_item.published = new_feed_item.published or now_in_utc(
                        )
                        updated_news_items.append(existing_news_item)
        update_result.add(user.user_id, number_of_new_items)

    # Upsert the new and updated feed_items.
    await repositories().feed_item_repository.upsert_many(new_feed_items)
    await repositories().feed_item_repository.upsert_many(updated_feed_items)
    await repositories().news_item_repository.upsert_many(new_news_items)
    await repositories().news_item_repository.upsert_many(updated_news_items)

    # Update information in feed item with latest information from the url.
    feed.last_fetched = datetime.utcnow()
    feed.description = updated_feed.description
    feed.title = updated_feed.title
    feed.number_of_items = feed.number_of_items + len(new_feed_items)
    await repositories().feed_repository.upsert(feed)
    return update_result