def test_updated_no_date(): item = dict(id="123") parser = JsonItemParser(item) entry = Entry() entry.updated = parser.updated(item) assert not entry.updated
def test_entry_authors_added_on_update(session, feed): entry = Entry( title="Title", feed=feed, link="http://test.com/entry1", guid="qwertyuiop" ) authorstring = entry.create_author_string() assert authorstring == "" assert len(entry.authors) == 0 author = dict(name="Test Author2", email="*****@*****.**") item = dict(authors=[author], title="Title 2", link=entry.link) assert entry.title != item["title"] e, a = RssItemParser(item, feed, entry=entry).parse() assert e is not None assert len(e.authors) == 1 assert e.authors[0].name == author["name"] assert e.authors[0].email == author["email"] new_authorstring = entry.create_author_string() assert new_authorstring == author["name"] assert entry.title == item["title"]
def test_updated_no_date(self): item = dict() parser = JsonItemParser(item, self.feed) entry = Entry() entry.updated = parser.updated(item) self.assertIsNone(entry.updated)
def clean_html(self, ids): entries = Entry.query.filter(Entry.id.in_(ids)).all() app.logger.info(u"Admin Cleaning HTML for Entries: {0}".format(u", ".join(map(str, entries)))) for entry in entries: entry.content = Entry.clean_images(entry.content) entry.summary = Entry.clean_images(entry.summary) entry.title = Entry.clean_title(entry.title) db.session.add(entry) db.session.commit()
def test_updated(): dt_unix = "1448928000" item = dict(updated=dt_unix) parser = JsonItemParser(item) entry = Entry() entry.updated = parser.updated(item) assert entry.updated dt = datetime(2015, 12, 1) TestCase().assertAlmostEqual(entry.updated, dt, delta=timedelta(seconds=1))
def test_published_no_date(): item = dict(id="123") parser = JsonItemParser(item) entry = Entry() entry.published = parser.published(item) assert entry.published dt = datetime.utcnow() TestCase().assertAlmostEqual(entry.published, dt, delta=timedelta(seconds=1))
def test_published_no_date(self): item = dict() parser = JsonItemParser(item, self.feed) entry = Entry() entry.published = parser.published(item) self.assertIsNotNone(entry.published) dt = datetime.utcnow() self.assertAlmostEqual(entry.published, dt, delta=timedelta(seconds=1))
def test_published_updated(self): dt_unix = '1448928000' item = dict(updated=dt_unix) parser = JsonItemParser(item, self.feed) entry = Entry() entry.published = parser.published(item) self.assertIsNotNone(entry.published) dt = datetime(2015, 12, 1) self.assertAlmostEqual(entry.published, dt, delta=timedelta(seconds=1))
def test_parser_base_new_entries(session): entry1 = Entry() entry1.is_update = True entry2 = Entry() entry2.is_update = False entry3 = Entry() entries = {entry1, entry2, entry3} parser = RssParser() new_entries = parser.new_entries(entries) assert len(new_entries) == 2 assert entry2 in new_entries assert entry3 in new_entries
def clean_html(self, ids): try: entries = Entry.query.filter(Entry.id.in_(ids)).all() app.logger.info( "Admin Cleaning HTML for Entries: %s", stringify_list(entries) ) for entry in entries: entry.content = Entry.clean_content(entry.content) entry.summary = Entry.clean_content(entry.summary) entry.title = Entry.clean_title(entry.title) entry.create_summary() db.session.add(entry) db.session.commit() flash(f"HTML was successfully cleaned for {len(ids)} Entries.", "success") except Exception as ex: if not self.handle_view_exception(ex): raise flash(f"Failed to clean Entry HTML. {ex}", "error")
def title(self, item): """ Gets the title of an item. :param item: deserialized JSON item :type item: dict :return: str """ title = item.get('title', None) return Entry.clean_title(title)
def title(self, item: Dict) -> str: """ Gets the title of an item. :param item: Feedparser entry :type item: Dict :return: str """ title = item.get("title", "") return Entry.clean_title(title)
def title(self, item): """ Gets the title of an item. :param item: Feedparser entry :type item: dict :return: str """ title = item.get('title', None) return Entry.clean_title(title)
def title(self, item: Dict) -> str: """ Gets the title of an item. :param item: deserialized JSON item :type item: Dict :return: str """ title = item.get("title", "") if title: title = Entry.clean_title(title) return title
def summary(self, item): """ Gets the summary of an item. :param item: Feedparser entry :type item: dict :return: str """ summary = item.get('summary', None) if summary: summary = Entry.clean_images(summary) return summary
def test_parse_wapo_feed(feed, rss_parser, wapo_rss): existing_guid = ( "https://www.washingtonpost.com/business/economy/trump-rewrites-gop-playbook-in-his" "-own-image/2018/02/11/8505873c-0dec-11e8-8890-372e2047c935_story.html" ) published_date = datetime(2018, 1, 1) existing_entry = Entry(feed=feed, guid=existing_guid, published=published_date) existing_entry.save() assert not existing_entry.updated assert existing_entry.published == published_date assert len(existing_entry.authors) == 0 rss_parser.feed = feed rss_parser.data = wapo_rss rss_parser.parse() parsed_entries = rss_parser.entries parsed_authors = rss_parser.authors assert len(parsed_entries) == 3 assert len(parsed_authors) == 3 entry1 = next((e for e in parsed_entries if e.authorstring == "Liz Clarke"), None) assert entry1 assert entry1.link assert entry1.published assert not entry1.updated assert len(entry1.authors) == 1 assert entry1.authors[0].name == "Liz Clarke" updated_entry = next((e for e in parsed_entries if e.guid == existing_guid), None) assert updated_entry assert updated_entry.link assert updated_entry.published == published_date assert updated_entry.updated assert updated_entry.updated != published_date assert len(updated_entry.authors) == 2
def test_entry_authors_updated(session, feed): author1 = AuthorFactory() entry = Entry(title="Title", feed=feed, link="http://test.com", guid="asdfghjkl") entry.authors.append(author1) authorstring = entry.create_author_string() author2 = dict(name="Jane Doe", email="*****@*****.**") item = dict(authors=[author2], title="Title 2", link=entry.link) e, a = RssItemParser(item, feed, entry=entry).parse() assert e is not None assert len(e.authors) == 2 assert e.authors[0] == author1 assert e.authors[1].name == author2["name"] assert len(entry.authors) == 2 new_authorstring = entry.create_author_string() assert new_authorstring == authorstring + " and " + author2["name"] assert entry.title == item["title"]
def content(self, item): """ Gets the content of the item. If content is None, try using the summary. :param item: deserialized JSON item :type item: dict :return: str """ content = item.get('content', None) if content is None: content = item.get('summary', None) if content: content = Entry.clean_images(content) return content
def content(self, item): """ Gets the content of the item. If content is None, try using the summary. :param item: Feedparser entry :type item: dict :return: str """ if item.get('content') is None: content = item.get('summary', None) else: try: c = item.get('content')[0] content = c.get('value') except: content = None if content: content = Entry.clean_images(content) return content
def test_clean_content(client): with open(TEST_FILES_DIR + "entry_content.xml", "r") as f: content = f.read() cleaned = Entry.clean_content(content, parser=bs4_parser()) assert cleaned.replace("\n", "").strip() == "<div><p>Testing</p></div>"
def test_entry_remove_empty_tags(client): content = "<div><p>Testing<span>\n<o:p></o:p></span></p></div>" soup = BeautifulSoup(content, bs4_parser()) Entry.remove_empty_tags(soup) assert str(soup) == "<div><p>Testing</p></div>"
def test_entry_remove_comments(client): content = "<div><p><!-- Comment -->Testing</p></div><!--Comment-->" soup = BeautifulSoup(content, bs4_parser()) Entry.remove_comments(soup) assert str(soup) == "<div><p>Testing</p></div>"
def test_entry_remove_style(client): content = '<div style="font-size:50px;" class="testing"><p style="color:blue;">Testing<style>Hello</style></p></div>' soup = BeautifulSoup(content, bs4_parser()) Entry.remove_unwanted_elements(soup) assert str(soup) == "<div><p>Testing</p></div>"
def handle_notification(self, feed, data): """Handles PuSH notifications in RSS and Atom format.""" app.logger.info(u'Handling RSS notification for {0}'.format(feed)) if data.feed is not None: if data.feed.get('title'): feed.title = data.feed.get('title') if data.feed.get('subtitle'): feed.description = data.feed.get('subtitle') elif data.feed.get('description'): feed.description = data.feed.get('description') db.session.add(feed) if data.entries is None: app.logger.warning(u'No entries in notification for {0}' .format(feed)) return for item in data.entries: try: itemId = item.get('id') except KeyError: itemId = item.get('link') if itemId is None: app.logger.warning(u'Could not get itemId for item {0} in ' 'feed {1}'.format(item, feed)) continue entry = Entry.query.filter_by(guid=itemId).first() if entry is None: entry = Entry(feed=feed) try: entry.published = datetime.fromtimestamp( mktime(item.get('published_parsed'))) except Exception: pass try: entry.updated = datetime.fromtimestamp( mktime(item.get('updated_parsed'))) except Exception: pass entry.title = item.get('title') entry.guid = itemId entry.link = item.get('link') if item.get('content') is None: entry.content = item.get('summary') else: content = item.get('content')[0] entry.content = content.get('value') entry.summary = item.get('summary') self.add_authors(entry, item.get('author')) entry.get_wordcount() app.logger.info(u'Adding entry <{0}>'.format(entry)) self.entries.append(entry) db.session.add(entry) return
def create_dev_data(user): from faker import Factory as FakerFactory faker = FakerFactory.create() feed = Feed( topic="http://test.com/feed", hub="http://push.hub.com", site_url="http://test.com", title="Test Feed", description="A test feed", site_name="TestFeed.com", user=user, ) db.session.add(feed) entry1 = Entry( title="Test Entry", guid="http://test.com/feed/12345345234", content=list_to_html_paragraphs(faker.paragraphs(nb=5)), published=datetime(2017, 1, 1), site="TestFeed.com", ) db.session.add(entry1) entry2 = Entry( title="Another Test Entry", guid="http://test.com/feed/346546gsdfgd", content=list_to_html_paragraphs(faker.paragraphs()), published=datetime(2017, 2, 1), site="TestFeed.com", ) db.session.add(entry2) author1 = Author( givenname="Testy", familyname="McTesterson", name="Testy McTesterson", email="*****@*****.**", url="http://test.com/authors/testy", ) db.session.add(author1) author2 = Author( givenname="John", familyname="Doe", name="John Doe", email="*****@*****.**", url="http://test.com/authors/johndoe", ) db.session.add(author2) entry1.add_authors([author1]) entry2.add_authors([author1, author2]) subscription = Subscription(user=user, author=author1, active=True) subscription.add_period(PERIOD.DAILY) subscription.add_period(PERIOD.IMMEDIATE) subscription.add_period(PERIOD.WEEKLY) subscription.add_period(PERIOD.MONTHLY) db.session.add(subscription) r1 = Recommended(author=author1, active=True) r2 = Recommended(author=author2, active=True) db.session.add(r1) db.session.add(r2) dailyPeriod = Period.query.filter_by(name=PERIOD.DAILY).first() email = Email( user=user, period=dailyPeriod, authors=[author1, author2], entries=[entry1, entry2], address=user.email, ) db.session.add(email) db.session.commit()
def test_entry_clean_title(self): string = "\u2019 Test Title" title = Entry.clean_title(string) self.assertEqual(title, '\u2019 Test Title')
def test_entry_clean_title(client): string = "\u2019 Test Title" title = Entry.clean_title(string) assert title == "\u2019 Test Title"
def handle_json_notification(self, feed, data): """Handles PuSH notifications in JSON format""" app.logger.info(u'Handling JSON notification for {0}'.format(feed)) if data.get('title'): feed.title = data.get('title') if data.get('description'): feed.description = data.get('description') db.session.add(feed) if data.get('items') is None: app.logger.warning(u'No entries in notification for {0}' .format(feed)) return for item in data.get('items'): try: itemId = item.get('id') except KeyError: itemId = item.get('link') if itemId is None: app.logger.warning(u'Could not get itemId for item {0} in ' 'feed {1}'.format(item, feed)) continue entry = Entry.query.filter_by(guid=itemId).first() if entry is None: entry = Entry(feed=feed) entry.title = item.get('title') entry.guid = itemId try: entry.published = datetime.fromtimestamp(item.get('published')) except Exception: pass try: entry.updated = datetime.fromtimestamp(item.get('updated')) except Exception: pass if item.get('content') is None: entry.content = item.get('summary') else: entry.content = item.get('content') entry.summary = item.get('summary') entry.link = item.get('permalinkUrl') self.add_authors(entry, item.get('actor.displayname')) entry.get_wordcount() app.logger.info(u'Adding entry <{0}>'.format(entry)) self.entries.append(entry) db.session.add(entry) return
def test_entry_remove_feedflare(client): content = '<div><p>Testing</p></div><div class="feedflare"><p>Feedflare</p></div>' soup = BeautifulSoup(content, bs4_parser()) Entry.remove_feedflare(soup) assert str(soup) == "<div><p>Testing</p></div>"