def update_feed_details(self, data: Dict, feed: Feed) -> None: """ Updates the title and description of a feed if they have been sent with the notification. :param data: deserialized JSON :type data: Dict :param feed: The Feed to be updated :type feed: Feed :return: Feed object """ feed.title = data.get("title") if data.get("description"): feed.description = data["description"] if data.get("home_page_url"): feed.site_url = data["home_page_url"] if data.get("favicon"): feed.site_icon_url = data["favicon"] if data.get("feed_url"): feed.update_feed_topic(data["feed_url"]) if data.get("expired") and bool(data.get("expired")) == True: feed.set_expired() if data.get("hubs"): feed.handle_websub(data["hubs"])
def test_fetch_feed_no_new_content(session, capture_signal_notification_received): feed = Feed(topic="http://test.com/feed.rss") body = "<rss></rss>" feed.new_content(body) responses.add( responses.GET, feed.topic, body=body, status=200, content_type="application/rss+xml", ) result = FeedFetcher.fetch_feed(feed) assert result is False assert feed.last_status_code == 200 assert not feed.etag assert not feed.last_modified assert feed.next_scheduled_update > Feed.current_timestamp() assert len(capture_signal_notification_received) == 0 stats = FeedStats.query.filter_by(feed_id=feed.id).first() assert stats assert stats.status_code == 200 assert stats.new_entries == 0 assert stats.updated_entries == 0 assert stats.type == FetchType.PULL assert stats.latency > 0 assert stats.parse_time == 0
def test_fetch_feed_not_modified_last_modified( session, capture_signal_notification_received ): feed = Feed(topic="http://test.com/feed.rss") content_hash = "adfasdfasd" feed.content_hash = content_hash modified = datetime(2017, 6, 6, 12, 0, 0) responses.add( responses.GET, feed.topic, body=None, status=304, adding_headers={"last-modified": create_modified_header(modified)}, content_type="application/rss+xml", ) result = FeedFetcher.fetch_feed(feed) assert result is False assert feed.last_status_code == 304 assert not feed.etag assert feed.last_modified == modified assert feed.content_hash == content_hash assert feed.next_scheduled_update > Feed.current_timestamp() assert len(capture_signal_notification_received) == 0 stats = FeedStats.query.filter_by(feed_id=feed.id).first() assert stats assert stats.status_code == 304 assert stats.new_entries == 0 assert stats.updated_entries == 0 assert stats.type == FetchType.PULL assert stats.latency > 0 assert stats.parse_time == 0
def test_unsubscribe_raises_error(session): topic = "http://test.com" with pytest.raises(SubscriberError): subscriber.unsubscribe(topic=topic) feed = Feed(topic=topic).save() with pytest.raises(SubscriberError): subscriber.unsubscribe(topic=topic) feed.is_push = True with pytest.raises(SubscriberError): subscriber.unsubscribe(topic=topic) feed.hub = "http://test.com/hub" with pytest.raises(SubscriberError): subscriber.unsubscribe(topic=topic) feed.status = STATUS.UNSUBSCRIBED with pytest.raises(SubscriberError): subscriber.unsubscribe(topic=topic)
def create_feed(self, url, hub=None, find_feed=True): """ Creates a Feed, searching the website for a feed and PuSH links if find_feed is True. :param url: The URL of a website or feed. :param hub: The URL of the PuSH hub to use. :param find_feed: Set to True to search the website for a feed and override the url if feed is found. :return: Feed """ topic = url parsed_feed = None if find_feed: app.logger.info(u'Finding feed for url {0}'.format(url)) data = get_feed(url) app.logger.info(u'Found feed {0} with hub {1} for url {2}' .format(data[1], data[0], url)) if data[0]: if not hub: hub = data[0] if data[1]: topic = data[1] if data[2]: parsed_feed = data[2] if not hub: hub = app.config['DEFAULT_HUB'] app.logger.info(u'Creating Feed with topic {0} and hub {1}' .format(topic, hub)) feed = Feed(topic=topic, hub=hub) if parsed_feed: try: url = parsed_feed.feed['link'] domain = get_domain(url) if domain: app.logger.info('Set site_url for Feed {0} to {1}' .format(feed, domain)) feed.site_url = domain except: pass db.session.add(feed) if parsed_feed: app.logger.info(u'Feed creation for topic {0} parsed a feed, ' 'sending notification'.format(topic)) notification_received.send(self, feed=feed, content_type='application/rss+xml', content=parsed_feed, parsed=True) db.session.commit() return feed
def send_subscribe_feed(): """ Subscribes to a single feed. """ requested_url = request.get_json() app.logger.info(u'User: {0} requested Subscription to feed: {0}' .format(current_user, requested_url)) # Only subscribe if feed was found by search_feed method and # loaded in session, otherwise return empty result. if requested_url not in session['feed_urls']: app.logger.warning(u'Requested url: {0} was not found in session' .format(requested_url)) return jsonify({'subcribed': None}) print(session['feeds']) feed_info_schema = FeedInfoSchema(many=True) feedinfos = feed_info_schema.load(session['feeds']) print(feedinfos.data) feedinfo = None for f in feedinfos.data: f1 = feed_info_schema.make_feedinfo(f) print(f1) if f1.url == requested_url: feedinfo = f1 if not feedinfo: app.logger.warning(u'Requested url: {0} was not found in session' .format(requested_url)) return jsonify({'subcribed': None}) feed = Feed.query.filter_by(topic=feedinfo.url).first() if not feed: feed = Feed(topic=feedinfo.url, hub=feedinfo.hub or app.config['DEFAULT_HUB']) feed.update_from_feedinfo(feedinfo) app.logger.info(u'User: {0} created Feed: {1}' .format(current_user, feed)) else: if feed.status == STATUS.SUBSCRIBED: feedinfo.subscribed = True return jsonify({'subcribed': feedinfo}) db.session.add(feed) result = subscriber.subscribe(feed.topic, find_feed=False) return jsonify({'subscribed': feedinfo.url, 'message': result[1]})
def test_get_scheduled_feeds(session): timestamp = Feed.current_timestamp() feed_to_fetch_1 = FeedFactory(next_scheduled_update=timestamp - 1000) feed_to_fetch_2 = FeedFactory(next_scheduled_update=timestamp - 100000) feed_future_fetch = FeedFactory(next_scheduled_update=timestamp + 100) feed_is_push = FeedFactory(next_scheduled_update=timestamp - 1000, is_push=True) feed_is_inactive = FeedFactory(next_scheduled_update=timestamp - 1000, active=False) to_fetch = FeedFetcher.get_scheduled_feeds() assert len(to_fetch) == 3 TestCase().assertCountEqual( to_fetch, [feed_to_fetch_1, feed_to_fetch_2, feed_is_push] ) feed_to_fetch_2.next_scheduled_update = timestamp + 1000 feed_to_fetch_3 = FeedFactory(next_scheduled_update=timestamp - 100) to_fetch = FeedFetcher.get_scheduled_feeds() assert len(to_fetch) == 3 TestCase().assertCountEqual( to_fetch, [feed_to_fetch_1, feed_to_fetch_3, feed_is_push] ) to_fetch = FeedFetcher.get_scheduled_feeds(1) assert len(to_fetch) == 1
def process_feed_info(self, feed_info: StatusFeedInfo, feed: Feed) -> None: """ Get and update feed with feed_info details. """ if feed: app.logger.info("Updating %s with info: %s", feed, feed_info) feed.update_from_feed_info(feed_info) db.session.add(feed) if feed.excluded: self.excluded.append(feed_info.url) return None if feed.active: feed_info.subscribed = True self.feed_info_list.append(feed_info) self.urls.append(feed_info.url)
def test_feed_creation(session): topic = "http://testing.com/feed" hub = "https://push.superfeedr.com" feed_format = "atom" feed = Feed(topic=topic, hub=hub) feed.save() assert feed.topic == topic assert feed.hub == hub assert feed.feed_format == feed_format assert feed.status == STATUS.UNSUBSCRIBED assert feed.secret is not None assert feed.unique_url is not None secret = feed.secret feed.create_secret() assert feed.secret == secret
def test_feed_creation(self): topic = 'http://testing.com/feed' hub = 'https://push.superfeedr.com' feed_format = 'json' feed = Feed(topic=topic, hub=hub) feed.save() self.assertEqual(feed.topic, topic) self.assertEqual(feed.hub, hub) self.assertEqual(feed.feed_format, feed_format) self.assertEqual(feed.status, STATUS.UNSUBSCRIBED) self.assertIsNotNone(feed.secret) self.assertIsNotNone(feed.unique_url) secret = feed.secret feed.create_secret() self.assertEqual(feed.secret, secret)
def update_feed_version(self, data: Dict, feed: Feed) -> None: """ Sets the format of the Feed. :param data: parsed XML feed :type data: Dict :param feed: A Feed object :type feed: Feed :return: Feed object """ feed.feed_format = data.get("version", "RSS/Atom")
def update_feed_version(self, data: Dict, feed: Feed) -> None: """ Sets the format of the Feed. :param data: deserialized JSON :type data: Dict :param feed: A Feed object :type feed: Feed :return: Feed object """ feed.feed_format = "json"
def update_feed_details(self, data: Dict, feed: Feed) -> None: """ Updates the title and description of a feed if they have been sent with the notification. :param data: parsed XML feed :type data: Dict :param feed: The Feed to be updated :type feed: Feed :return: Feed object """ feed_data = data.get("feed") if feed_data: if feed_data.get("title"): feed.title = feed_data["title"] if feed_data.get("subtitle"): feed.description = feed_data.get("subtitle") elif feed_data.get("description"): feed.description = feed_data.get("description")
def update_feed_details(self, data: Dict, feed: Feed) -> None: """ Updates the title and description of a feed if they have been sent with the notification. :param data: deserialized JSON :type data: Dict :param feed: The Feed to be updated :type feed: Feed :return: Feed object """ if data.get("title"): feed.title = data.get("title") if data.get("description"): feed.description = data["description"] elif data.get("subtitle"): feed.description = data["subtitle"] feed.feed_format = "json"
def test_fetch_feed(session, capture_signal_notification_received): feed = Feed(topic="http://test.com/feed.rss") body = "<rss></rss>" responses.add( responses.GET, feed.topic, body=body, status=200, content_type="application/rss+xml", ) result = FeedFetcher.fetch_feed(feed) assert result is True assert feed.last_status_code == 200 assert not feed.etag assert not feed.last_modified assert feed.next_scheduled_update > Feed.current_timestamp() captured_feed, captured_content = capture_signal_notification_received[0] assert captured_feed.topic == feed.topic assert captured_content == body
def test_feed_set_next_scheduled_update(session): timestamp = Feed.current_timestamp() feed = FeedFactory() assert not feed.last_fetch assert not feed.next_scheduled_update feed.set_next_scheduled_update() assert feed.next_scheduled_update > timestamp assert feed.next_scheduled_update < timestamp + 4000 feed.last_fetch = datetime.utcnow() feed.set_next_scheduled_update() assert feed.next_scheduled_update > timestamp assert feed.next_scheduled_update < timestamp + 4000 feed.last_fetch = datetime.utcnow() + timedelta(days=1) feed.set_next_scheduled_update() assert feed.next_scheduled_update > timestamp feed.last_fetch = datetime.utcnow() feed.set_next_scheduled_update(10000) assert feed.next_scheduled_update > timestamp + 9500 assert feed.next_scheduled_update < timestamp + 11500
def create_feed(self, url, hub=None, find_feed=True): """ Creates a Feed, searching the website for a feed and PuSH links if find_feed is True. :param url: The URL of a website or feed. :param hub: The URL of the PuSH hub to use. :param find_feed: Set to True to search the website for a feed and override the url if feed is found. :return: Feed """ topic = url parsed_feed = None if find_feed: app.logger.info("Finding feed for url %s", url) data = get_feed(url) app.logger.info( "Found feed %s with hub %s for url %s", data[1], data[0], url ) if data[0]: if not hub: hub = data[0] if data[1]: topic = data[1] if data[2]: parsed_feed = data[2] if not hub: hub = app.config.get("DEFAULT_HUB") app.logger.info("Creating Feed with topic %s and hub %s", topic, hub) feed = Feed(topic=topic, hub=hub) if parsed_feed: try: url = parsed_feed.feed.get("link") domain = get_domain(url) if domain: app.logger.info("Set site_url for %s to %s", feed, domain) feed.site_url = domain except Exception as e: app.logger.error("Error getting domain for %s: %s", feed, e) pass db.session.add(feed) if parsed_feed: app.logger.info( "Feed creation for topic %s parsed a feed, " "sending notification", topic, ) notification_received.send( self, feed=feed, content_type="application/rss+xml", content=parsed_feed, parsed=True, ) db.session.commit() return feed
def test_feed_create_secret(self): feed = Feed(topic='test', hub='testhub') feed.create_secret() self.assertIsNotNone(feed.secret)
def index(self): current_time = datetime.utcnow() dayago = current_time - timedelta(days=1) weekago = current_time - timedelta(weeks=1) monthago = current_time - timedelta(days=31) yearago = current_time - timedelta(days=365) immediate_subs = get_count( db.session.query(Subscription.id) .join(Period, Subscription.periods) .filter(Period.name == PERIOD.IMMEDIATE, Subscription.active == True) ) daily_subs = get_count( db.session.query(Subscription.id) .join(Period, Subscription.periods) .filter(Period.name == PERIOD.DAILY, Subscription.active == True) ) weekly_subs = get_count( db.session.query(Subscription.id) .join(Period, Subscription.periods) .filter(Period.name == PERIOD.WEEKLY, Subscription.active == True) ) monthly_subs = get_count( db.session.query(Subscription.id) .join(Period, Subscription.periods) .filter(Period.name == PERIOD.MONTHLY, Subscription.active == True) ) active_users = get_count(db.session.query(User.id).filter_by(active=True)) admin_users = get_count( db.session.query(User.id) .join(Role, User.roles) .filter(Role.name == "admin") ) entries_last_day = get_count( db.session.query(Entry).filter(Entry.published > dayago) ) entries_last_week = get_count( db.session.query(Entry).filter(Entry.published > weekago) ) entries_last_month = get_count( db.session.query(Entry).filter(Entry.published > monthago) ) entries_last_year = get_count( db.session.query(Entry).filter(Entry.published > yearago) ) authors_subscribed = get_count( db.session.query(Author) .join(Subscription) .filter(Subscription.active == True) ) subscribed_feeds = get_count( db.session.query(Feed).filter(Feed.status == STATUS.SUBSCRIBED) ) unsubscribed_feeds = get_count( db.session.query(Feed).filter(Feed.status == STATUS.UNSUBSCRIBED) ) default_hub_feeds = get_count( db.session.query(Feed).filter(Feed.hub == app.config["DEFAULT_HUB"]) ) fetch_feeds = get_count( db.session.query(Feed.id).filter( Feed.status == STATUS.FETCH, Feed.active == True ) ) daily_emails = get_count( db.session.query(Email) .join(Period, Email.period) .filter(Period.name == PERIOD.DAILY) ) weekly_emails = get_count( db.session.query(Email) .join(Period, Email.period) .filter(Period.name == PERIOD.WEEKLY) ) monthly_emails = get_count( db.session.query(Email) .join(Period, Email.period) .filter(Period.name == PERIOD.MONTHLY) ) current_timestamp = Feed.current_timestamp() return self.render( "admin/home.html", userCount=get_count(db.session.query(User)), feedCount=get_count(db.session.query(Feed)), entryCount=get_count(db.session.query(Entry)), authorCount=get_count(db.session.query(Author)), subscriptionCount=get_count(db.session.query(Subscription)), emailCount=get_count(db.session.query(Email)), daily_emails=daily_emails, weekly_emails=weekly_emails, monthly_emails=monthly_emails, immediate_subs=immediate_subs, daily_subs=daily_subs, weekly_subs=weekly_subs, monthly_subs=monthly_subs, active_users=active_users, admin_users=admin_users, entries_last_year=entries_last_year, entries_last_day=entries_last_day, entries_last_week=entries_last_week, entries_last_month=entries_last_month, authors_subscribed=authors_subscribed, subscribed_feeds=subscribed_feeds, unsubscribed_feeds=unsubscribed_feeds, default_hub_feeds=default_hub_feeds, fetch_feeds=fetch_feeds, current_timestamp=current_timestamp, current_datetime=datetime.utcnow(), )
def create_or_activate_feed( cls, url: str = "", hub: str = "", feedinfo: StatusFeedInfo = None, user=None ): """ Creates a Feed, or sets an existing Feed to active. If hub then sets the Feed to use Websub. If feedinfo then updates the Feed information from feedinfo. If user then sets Feed user. :param hub: Websub hub URL :param user: User who created the feed :param url: Feed URL :param feedinfo: StatusFeedInfo object :return: Feed """ if not url and not feedinfo and not feedinfo.url: raise AttributeError("Must have either Url or StatusFeedInfo") if feedinfo: url = url if url else feedinfo.url if not hub and feedinfo.hubs: hub = feedinfo.hubs[0] if not validators.url(url): raise ValueError("URL arg is not a valid URL") app.logger.info("Creating or activating Feed with URL %s", url) feed = Feed.query.filter_by(topic=url).first() if feed: app.logger.info("%s activated", feed) else: feed = Feed(topic=url) if user: app.logger.info("%s created by %s", feed, user) feed.user = user else: app.logger.info("%s created", feed) if feedinfo: feed.update_from_feed_info(feedinfo) # Sets the Feed to use Websub if hub: feed.hub = hub feed.is_push = True # If not Websub feed then set Feed as Fetch only if not feed.is_push: feed.fetch = True # Set the Feed to fetch on the next fetch task feed.set_next_scheduled_update(frequency=0, range_percent=0) feed.active = True feed.save() return feed
def fetch_feed(cls, feed: Feed, force: bool = False) -> bool: """ Fetch a single Feed. Will attempt to fetch latest version of Feed. Statistics will be saved and next fetch scheduled regardless of fetch success. If fetch failed or there is no change in the feed, then no notification_received signal will be sent and the process will exit. :param feed: Feed to be fetched :param force: Always fetch regardless of last-modified values :return: True if fetch was successful with new content """ fetched: bool = False content: str = "" content_length: int = 0 response_url: str = "" headers: dict = {} status_code: int = 500 encoding: str = "" app.logger.info("Fetching %s", feed) start = time.perf_counter() try: with requests.get( feed.topic, headers=feed.fetch_headers(force), timeout=(3.05, 10), stream=True, ) as response: # Capture response variables before raising any exceptions status_code = response.status_code headers = response.headers encoding = response.encoding or "utf-8" # Only set content if content length is acceptable, else raise ContentLengthException content_length = int(response.headers.get("Content-Length", 0)) if content_length < app.config.get("MAX_FEED_LENGTH"): content = response.text else: raise ContentLengthException() response.raise_for_status() except requests.Timeout as t: app.logger.warning("Timeout fetching Feed %s: %s", feed, t) except requests.ConnectionError as c: app.logger.warning("ConnectionError fetching feed %s: %s", feed, c) except requests.RequestException as e: app.logger.warning("Error fetching Feed %s: %s", feed, e) except ContentLengthException: app.logger.warning("TOO BIG: feed=%s size=%s", feed, content_length) else: fetched = True request_time_ms = int((time.perf_counter() - start) * 1000) # Read content length from content only if available and required if content and content_length == 0: content_length = utf8len(content) app.logger.info( "FETCHED: topic=%s duration=%dms status=%s size=%sb", feed.topic, request_time_ms, status_code, content_length, ) feed.last_status_code = status_code feed.last_fetch = datetime.utcnow() feed.set_next_scheduled_update() stats = FeedStats.create_stats( feed.id, FetchType.PULL, status_code=status_code, latency=request_time_ms, content_length=content_length, ) # Set feed to inactive if Feed is Gone if status_code == 410: feed.gone = True # Exit if Feed not successfully fetched if not fetched: return cls.no_change(feed, stats) # Set Feed ETag from response # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag etag = headers.get("etag") if etag: feed.etag = etag # Set Feed Last-Modified from response # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified last_modified = headers.get("last-modified") if last_modified: feed.last_modified = feed.set_last_modified(last_modified) # Save and return if not modified code received if status_code == 304: return cls.no_change(feed, stats) # Check if content hash is new new_content = feed.new_content(content, encoding or "UTF-8") # Save and return if no new content and not forcing update if not new_content and not force: return cls.no_change(feed, stats) db.session.commit() # Set content-location header to final url if not already set if "content-location" not in headers: headers["content-location"] = response_url notification_received.send( cls, feed=feed, content_type=parse_options_header(headers.get("content-type")), content=content, encoding=encoding, stats=stats, headers=headers_to_dict(headers), ) return True
def test_feed_create_secret(session): feed = Feed(topic="test", hub="testhub") feed.create_secret() assert feed.secret is not None
def create_feed_from_feed_info(feed_info, hub=None, feed_format=None): if not hub: if feed_info.hub: hub = feed_info.hub else: hub = app.config.get("DEFAULT_HUB") feed = Feed.query.filter_by(topic=feed_info.url).first() if not feed: feed = Feed(topic=feed_info.url, hub=hub) feed.create_unique_url() feed.create_secret() feed.feed_format = feed_format if feed_format else feed.feed_format if not feed.callback_url: feed.callback_url = get_public_url(feed) feed.site_url = (feed_info.site_url,) feed.description = (feed_info.descripton,) feed.site_name = (feed_info.site_name,) feed.title = (feed_info.title,) feed.site_icon_link = feed_info.site_icon_link return feed