def main(): links, titles, items = [], [], [] for id in html_main.find_all("div", {"class": "mt-2 pt-2 text-right"}): id = id.get_text().split("aptm.in/protip/")[1].split(" »")[0] links.append(f"https://aptmasterclass.com/protip/{id}") for title in html_main.find_all("h1", {"class": "heading mb-3"}): title = title.get_text().split("\n")[1].strip() titles.append(title) for i in range(len(links)): items.insert( i, rfeed.Item( title=titles[i], link=links[i], description=get_description(links[i]) ), ) feed = rfeed.Feed( title=FEED_TITLE, description=FEED_DESCRIPTION, link=FEED_LINK, language="pl", lastBuildDate=datetime.now(), items=items, ) with open("feed.xml", "w") as f: f.write(feed.rss())
def generate_rss(blog_container: BlogContainer): """Generate RSS feed using rfeed""" request = blog_container.request blog_title = request.registry.settings.get("blog.title") blog_email = request.registry.settings.get("blog.rss_feed_email", "*****@*****.**") items = [] for post_resource in blog_container.get_posts(): post = post_resource.post item = rfeed.Item( title=post.title, link=request.resource_url(post_resource), description="This is the description of the first article", author=blog_email, creator=post.author, guid=rfeed.Guid(str(post.id)), pubDate=post.published_at, extensions=[ContentItem(post_resource)]) items.append(item) feed = rfeed.Feed(title=blog_title, link=request.resource_url(blog_container, "rss"), description="", language="en-US", lastBuildDate=now(), items=items, extensions=[Content()]) return feed
async def mangarock_manga_feed(oid): base_url = 'https://api.mangarockhd.com/query/web401/info' async with aiohttp.request('GET', base_url, params={'oid': oid}) as resp: if resp.status != 200: return text_response(f'No manga with ID {id} could be found'), 404 data = await resp.json() if data.get('code') != 0: return text_response(f'No manga with ID {id} could be found'), 404 else: data = data.get('data', {}) max_items = request.args.get('max', 20) chapters = [ rfeed.Item( title = c.get('name'), link = f'https://mangarock.com/manga/{oid}/chapter/{c["oid"]}', # pubDate = datetime.fromtimestamp(c['updatedAt']), # Ignore pubDate so that chapters will be displayed # in the right order. ) for c in islice(reversed(data.get('chapters', ())), max_items)] updated_time = datetime.fromtimestamp(data['last_update']) return rss_response(rfeed.Feed( title=data.get('name'), link=f'https://mangarock.com/manga/{oid}', description=data.get('description'), lastBuildDate=updated_time, pubDate=updated_time, generator=generator_name, items=chapters))
def tweets_to_rss(user, limit): return rf.Feed( title="Tweets from @{0}".format(user), description="Tweets from @{0}, generated by ProxyTweet".format(user), link="https://twitter.com/{0}".format(user), language="en-us", ttl=15, items=[format_tweet(t, user) for t in get_user_tweets(user, limit)], extensions=[TwitterFeed()], )
def root(): db = sqlite3.connect('feed.sqlite3') cur = db.cursor() items = [] for d, link, desc, uuid in cur.execute( 'select d, link, desc, uuid from feed order by d asc'): item = rfeed.Item(title=desc[:30], link=link, description=desc, guid=rfeed.Guid(uuid)) items.append(item) feed = rfeed.Feed(title='Docfeed', link='http://rje.li', description='Docfeed', items=items) return feed.rss()
def create_feed(profile_html, url, max_items=10): # Parse html soup = BeautifulSoup(profile_html, features="html5lib") # Extract the data object data = None for script in soup.find_all('script'): text = str(script.string) pat = '^\s*window._sharedData\s*=' if re.match(pat, text): data = json.loads(re.sub(';\s*$', '', re.sub(pat, '', text))) # Select relevant data and build feed try: user = data['entry_data']['ProfilePage'][0]['graphql']['user'] timeline = user['edge_owner_to_timeline_media'] except (KeyError, TypeError): print(data, file=sys.stderr) raise items = [] for item in timeline['edges'][:max_items]: node = item['node'] link = 'https://www.instagram.com/p/' + node['shortcode'] caption = node['accessibility_caption'] caption_begin = caption.split('. ')[0] if caption is not None else '' caption_end = '. '.join( caption.split('. ')[1:]) if caption is not None else '' items.append( rfeed.Item(title=caption_begin, link=link, description=caption_end, author=user['full_name'], guid=rfeed.Guid(node['id']), pubDate=datetime.datetime.fromtimestamp( node['taken_at_timestamp']))) feed = rfeed.Feed(title=user['full_name'], link=url, description=soup.title.text.strip(), language="en-US", lastBuildDate=datetime.datetime.now(), items=items) return feed
def create_random_feed(filename, feed_length, title_length): selec = get_rows(filename, feed_length) content_list = [] for it in selec: it = it.strip() fi = rfeed.Item(title=truncate(it, title_length), description=it) content_list.append(fi) feed = rfeed.Feed(title="Sample RSS Feed", link="https://github.com/madhuri2k/fantastic-spoon", description="A Random selection of items", language="en-US", items=content_list) print("Feed is {}".format(feed.rss())) return feed
async def fanfic_feed(id): story_url = f'https://www.fanfiction.net/s/{id}' async with aiohttp.request('GET', story_url) as resp: if resp.status != 200: return text_response(f'No story with ID {id} could be found'), 404 story_html = await resp.content.read() soup = BeautifulSoup(story_html, 'html.parser') header = soup.find(id='profile_top') if not header: return text_response(f'No story with ID {id} could be found'), 404 title = header.find('b') author = title.find_next_sibling('a') description = author.find_next_sibling('div') updated = description.find_next_sibling('span').find('span', attrs={ 'data-xutime': True }) updated_time = datetime.fromtimestamp( int(updated['data-xutime'])) chapter_select = soup.find(id='chap_select') if not chapter_select: return text_response(f'No story with ID {id} could be found'), 404 max_items = request.args.get('max', 20) chapters = [] for option in islice(reversed(chapter_select.find_all('option')), max_items): chap_title = option.find( text=True, recursive=False).split('.', 1)[1].strip() i = option['value'] chapters.append(rfeed.Item( title = chap_title, link = f'{story_url}/{i}',)) return rss_response(rfeed.Feed( title=f'{title.text} by {author.text}', link=story_url, description=description.text, lastBuildDate=updated_time, pubDate=updated_time, generator=generator_name, items=chapters))
def rss(self): link = '/'.join([ _f for _f in [ self.baseUrl, 'rss', urllib.parse.quote(self.subdir.encode('utf-8')), urllib.parse.quote(self.filename.encode('utf-8')) ] if _f ]) return rfeed.Feed(title=self.title, link=link, description=self.title, language="de-AT", lastBuildDate=datetime.datetime.now(), image=rfeed.Image(url='/'.join( [self.baseUrl, 'images', 'default.png']), title=self.title, link=link), items=self.items).rss()
def main(): print("This is the initial validation and RSS convertor script.") includes = get_files("./docfx_project/includes") out_items = [] for i in includes: body = get_text_from_file(i) body_html = clear_docs_repo_metadata(body, d_format="txt") out_items.append(convert_rss_obj(body_html)) feed = rss.Feed( title = "Example RSS Feed", link = "http://docs.microsoft.com/azure-stack/rss", description = "Release notes for Azure Stack Hub.", language = "en-US", lastBuildDate = datetime.datetime.now(), items = out_items ) print(feed.rss())
def posts_to_feed(self, posts, title, description): items = list() for post_data in posts: post_id = post_data["id"] # Fetch each post so we can get at its text body post = self.fetch_post(request, post_id) item = rfeed.Item(title=post_data["title"]) # Use the resource URL as the "link" if "resource_url" in post_data.keys(): item.link = post_data["resource_url"] item.description = ("Post created by user " + post_data["username"] + " on " + post_data["date"] + " in " + post_data["community"] + "\n" + post["text"]) item.author = post_data["username"] # No url in original scheme, so just use global ID item.guid = rfeed.Guid(self.make_posting_post_url( request, post_id)) # print(post_data) item.pubDate = self.database_date_to_datetime_object( post_data["date"]) items.append(item) feed = rfeed.Feed(title=title, link=request.url, description=description, language="en-US", lastBuildDate=datetime.datetime.now(), items=items) return feed
def generate_publications_html(): with open('zx-papers.bib', encoding='utf-8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) bib_data, rss = library_to_html(bib_database) #Generate main page with open('html/publications_base.html') as f: html_base = f.read() output = html_base.replace("THE_CONTENT_HERE", bib_data) with open("publications.html", 'wb') as f: f.write(output.encode('utf-8')) feed = rfeed.Feed( title="ZX-calculus publications", link="http://zxcalculus.com/publications.rss", description= "An up to date list of the newest publications related to the ZX-calculus", language="en-US", lastBuildDate=datetime.datetime.now(), items=rss) with open("publications.rss", 'w', encoding='utf-8') as f: f.write(feed.rss())
def newFeed(rname, rdescription, debug=False): """ :param rname: name for the RSS feed :param rdescription: description of the RSS feed :return: Will return a new (empty publication) feed with "localhost:5000/rss/NAME_OF_FEED.xml" as link """ print("____________________________\n" "Creating a new rss feed") temp = rname.split(" ") nameOfFeed = "_".join(temp) if not debug: RSS_DIR = request.url_root + "static/rss/" feedLink = RSS_DIR + nameOfFeed + ".xml" else: feedLink = Path("superform/static/rss/" + nameOfFeed + ".xml") feed = rfeed.Feed(title=rname, link=feedLink, description=rdescription, lastBuildDate=datetime.datetime.now(), docs=None, items=[]) print("Link to the feed: ", feedLink) return feed, nameOfFeed
def root(): r = requests.get( 'https://calendar.google.com/calendar/ical/' + 'skypicker.com_dq9oupgj7ngbo0j41b0smoc0dk%40group.calendar.google.com' + '/public/basic.ics', timeout=10, ) r.raise_for_status() text = r.text events = [] feed = rfeed.Feed( title='code.kiwi.com events', link='https://goo.gl/aCCGCB', description='code.kiwi.com events', items=events, ) gcal = icalendar.Calendar.from_ical(text) for component in gcal.walk(): if component.name == "VEVENT": url = 'https://www.google.com/calendar/event?eid=' + \ base64.b64encode((component['UID'].split('@')[0] + ' skypicker.com_dq9oupgj7ngbo0j41b0smoc0dk@g').encode()).decode() description = component.get('description') if description: description = re.sub('Tato událost.*\nPřipojit se.*', '', description).strip() events.append( rfeed.Item( title=component.get('dtstart').dt.strftime( '%e %B').strip() + ' | ' + str(component.get('summary')), link=url, description=str(component.get('description')), guid=rfeed.Guid(url), )) return Response(feed.rss(), mimetype='application/rss+xml')
def generate(self): author = 'Workable' items = [] for job in self.jobs: title = job.get_position() + ', ' + job.get_company() item = rfeed.Item(title=title, link=job.get_link(), description=job.get_description(), author=author, guid=rfeed.Guid(job.get_link()), pubDate=job.get_date()) items.append(item) feed = rfeed.Feed(title='Workable\'s missing RSS feed', link='http://mylk.wtf/workable-missing-rss', description='Workable\'s missing RSS feed', language='el-GR', lastBuildDate=datetime.now(), items=items) return feed.rss()
async def make_feed(): base_url = 'http://www.bogleech.com/awfulhospital/archive.html' async with aiohttp.request('GET', base_url) as resp: if resp.status != 200: raise NotFound(f'Awful Hospital seems to be down right now') story_html = await resp.text() last_modified = resp.headers.get('Last-Modified') if last_modified: last_modified = datetime.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z') else: last_modified = datetime.now() # The HTML is so gunked up even BeautifulSoup won't cut it # I'm pretty sure Bogleech updates it directly in Notepad # May the old gods forgive me layers = [] for match in re.finditer(r'<a href="(.*?)">(.*?)<br>', story_html): link, title = match.group(1, 2) title = html.unescape( re.sub(r'<.*?>', lambda m: '' if m.group() == '</a>' else None, title)) layers.append(rfeed.Item(title=title, link=link)) layers.reverse() return rfeed.Feed(title='Awful Hospital', description='Seriously the worst ever.', link='http://www.bogleech.com/awfulhospital/', lastBuildDate=last_modified, pubDate=last_modified, generator=generator_name, items=layers)
print(f"\nLink: {link}") print(f" - Comic: {comic_img}") print(f" - Title: {title}") print(f" - Votey: {votey_img}") item = rfeed.Item( title=page_title, link=link, description='\n'.join(clines), guid=rfeed.Guid(link), pubDate=pub_time, ) item_list.append(item) limit -= 1 out_feed = rfeed.Feed( title="Saturday Morning Breakfast Cereal", link='https://www.smbc-comics.com/', description="RSS feed for Saturday Morning Breakfast Cereal", language="en-US", lastBuildDate=datetime.now(), items=item_list, generator=f"smbc-rss.py {github_url}" ) out_feed_path = os.path.join(config.get('feed_dir'), "smbc-rss.xml") with open(out_feed_path, 'w') as out_feed_file: out_feed_file.write(out_feed.rss())
def build_website(in_path, ignore_empty_posts=True, index_template="templates/index.html", post_template="templates/post.html", css_and_assets_path="templates", extension="md", index_paste_where="<!--###POSTS_LIST###-->", post_paste_where="<!--###POST_CONTENT###-->", title_paste_where="<!--###POSTPAGE_TITLE###-->", ul_class="postlist", post_wrapper="postcontent", headerseparator="---", obligatory_header=['title'], optional_header=['author', 'timestamp', 'tags', 'excerpt'], excerpt_type="chars", excerpt_len="500", excerpts_on=False, readmore="Read more >>", posts_per_page=0, pages_in_multiple_files=False, postlist_date_format="%d %b '%y", rss_feed_on=True, rss_feed_url="rss", blurb_is_manual_excerpt=False, rss_max_posts_number=10, blog_domain="", rss_feed_description='', rss_feed_title="My blog's RSS feed"): # Call everything try: fresh_posts = generate_posts(in_path, extension) except Exception as e: print( "Could not generate posts. Did you provide correct path to the post folder?" ) print(str(e)) try: filtered_posts = filter_bad_dates(fresh_posts) except Exception as e: print("Could not filter posts. Dunno why.") print(str(e)) try: ordered_posts = order(filtered_posts) except Exception as e: print("Could not order posts. It's impossible.") print(str(e)) try: for post in ordered_posts: post.get_content(headerseparator=headerseparator, obligatory=obligatory_header, optional=optional_header) post.build_pretty_date(date_format=postlist_date_format) post.get_excerpt(len_type=excerpt_type, excerpt_len=excerpt_len) except Exception as e: print( "Something went wrong with generating content and prettyfying dates. WHY?" ) print(str(e)) # Delete target folder so it can be rebuilt without conflicts try: shutil.rmtree("site", ignore_errors=True) except Exception as e: print( "Could not delete previous site folder. Check file permissions for the script." ) print(str(e)) try: build_site_folders() except Exception as e: print("Folders could not be built. Check file permissions.") print(str(e)) try: build_index_page(ordered_posts, index_template, ignore_empty=ignore_empty_posts, paste_where=index_paste_where, ul_class=ul_class, excerpts_on=excerpts_on, readmore=readmore, posts_per_page=posts_per_page, pages_in_multiple_files=pages_in_multiple_files) except Exception as e: print("Could not build index page. Did you provide a template?") print(str(e)) try: build_posts_folder(ordered_posts, post_template, ignore_empty=ignore_empty_posts, in_path=in_path, extension=extension, paste_where=post_paste_where, paste_where_title=title_paste_where, wrapper_class=post_wrapper) except Exception as e: print("Could not build post pages. Did you provide a template?") print(str(e)) try: # Build RSS Feed if rss_feed_on == True: # Loop to make items for the rfeed feed object rss_item_list = [] last_post_index = min(rss_max_posts_number, len(ordered_posts) - 1) for post in ordered_posts[0:last_post_index]: rss_item_list.append( rfeed.Item(title=post.title, link=blog_domain + "/posts/" + post.filename + ".html", description=post.excerpt, author=post.author, guid=rfeed.Guid(blog_domain + "/posts/" + post.filename + ".html"), pubDate=post.original_ugly_date)) rss_feed = rfeed.Feed(title=rss_feed_title, link=blog_domain + "/" + rss_feed_url, description=rss_feed_description, language=locale.getlocale()[0], lastBuildDate=datetime.datetime.now(), items=rss_item_list) # Writing the RSS feed to a file at the specified location with open("site/" + rss_feed_url, 'w+') as rss_target: rss_target.write(rss_feed.rss()) except Exception as e: print( "Could not generate the RSS feed or decide whether it should be generated at all" ) print(str(e)) # Copy all css, assets and lib try: copytree(css_and_assets_path + "/css", "site/css") except Exception as e: print( "Tried to copy contents of", css_and_assets_path, "/css folder but the folder does not exist! Make one, even empty!") print(str(e)) try: copytree(css_and_assets_path + "/assets", "site/assets") except Exception as e: print( "Tried to copy contents of", css_and_assets_path, "/assets folder but the folder does not exist! Make one, even empty!" ) print(str(e)) try: copytree(css_and_assets_path + "/lib", "site/lib") except Exception as e: print( "Tried to copy contents of", css_and_assets_path, "/lib folder but the folder does not exist! Make one, even empty!") print(str(e))
def parse(self, soup): items = chain.from_iterable( source.get_items(soup) for source in self.item_sources) return rfeed.Feed(**self.feed_template, items=items)
tor_tag = post_soup.find( 'a', attrs={'title': re.compile('Download attachment')}) mag_link = mag_tag.attrs['href'] tor_link = tor_tag.attrs['href'] data.append((post_name, post_url, tor_link, mag_link)) except: mag_link = None tor_link = None #print("%s:%s => %s"%(name,post_name,mag_link)) return (name, data) with Pool(16) as p: for name, data in p.map(fun, nms): movie_list[name] = data for ptype, purl, torurl, magurl in data: item_list.append( rfeed.Item(title=name + ' : ' + ptype, link=purl, enclosure=rfeed.Enclosure( magurl, 10000, 'application/x-bittorrent'))) #print(len(movie_list)) feed = rfeed.Feed(title="TR RSS Feed", description="Unofficial RSS feed for TR", link="localhost", lastBuildDate=datetime.datetime.now(), items=item_list) print(feed.rss())
title = item["title"] + " updated to " + item["version"] if "version" in item else "new version", link = "https://db.universal-team.net/" + webName(item["systems"][0]) + "/" + webName(item["title"]), description = (item["version_title"] if "version_title" in item else item["version"]) + (("<hr />" + item["update_notes"] if "update_notes" in item else "")), author = item["author"], guid = rfeed.Guid("https://db.universal-team.net/" + webName(item["systems"][0]) + "/" + webName(item["title"])), pubDate = parser.parse(item["updated"]), categories = item["systems"], extensions = [ rfeed.Enclosure( url = item["image"], length = len(requests.get(item["image"]).content), type = "image/png" ) if "image" in item else None ] )) if len(feedItems) > 0 and latestUpdate > oldUpdate: feed = rfeed.Feed( title = "Universal-DB", link = "https://db.universal-team.net", description = "A database of DS and 3DS homebrew", language = "en-US", lastBuildDate = datetime.datetime.now(), pubDate = datetime.datetime.now(), items = feedItems, image = rfeed.Image(title = "Universal-DB", url = "https://universal-team.net/images/icons/universal-team.png", link = "https://db.universal-team.net"), ) with open(os.path.join("..", "index.rss"), "w", encoding="utf8") as file: file.write(feed.rss())
pubtime = datetime.combine(the_date, datetime.min.time()) pubtime = pubtime.replace(tzinfo=pytz.UTC) item = rfeed.Item(title=title, link=url, description='\n'.join(clines), guid=rfeed.Guid(url), pubDate=pubtime) item_list.append(item) # Start building the feed feed = rfeed.Feed( title=entry.get('name'), link="{}/{}".format(root_url, slug), description="RSS feed for {}".format(entry.get('name')), language='en-US', lastBuildDate=datetime.now(), items=item_list, generator="comics-rss.py ({})".format(github_url), ) feed_path = os.path.join(feed_dir, "{}.xml".format(slug)) with open(feed_path, "w") as feed_file: feed_file.write(feed.rss()) if (expires > 0): to_prune = [] candidates = glob.glob("{}/{}-*.gif".format(cache_dir, slug)) for img in candidates: match = re.search(r'(\d{4}-\d{2}-\d{2})', img) if (match.group is None):
def generateFeed(self, podcast, feedName): items = [self.__createItem(item) for item in podcast] feed = rfeed.Feed(title='Parliament of Australia Podcast (Non-Official)', description='Parliament of Australia Podcast (Non-Official)', link='https://parlpod.datapunch.net', items=items) with open(os.path.join(self.directory, feedName+'.xml'), 'w') as f: f.write(feed.rss())
''' This is the example of the RSS library.''' import datetime import rfeed as rss item1 = rss.Item(title="First article", link="http://www.example.com/articles/1", description="This is the description of the first article", author="Santiago L. Valdarrama", guid=rss.Guid("http://www.example.com/articles/1"), pubDate=datetime.datetime(2014, 12, 29, 10, 00)) item2 = rss.Item(title="Second article", link="http://www.example.com/articles/2", description="This is the description of the second article", author="Santiago L. Valdarrama", guid=rss.Guid("http://www.example.com/articles/2"), pubDate=datetime.datetime(2014, 12, 30, 14, 15)) items = [item1, item2] feed = rss.Feed( title="Sample RSS Feed", link="http://www.example.com/rss", description= "This is an example of how to use rfeed to generate an RSS 2.0 feed", language="en-US", lastBuildDate=datetime.datetime.now(), items=items) print(feed.rss())