def __init__( self, title=None, # string link=None, # url as string description=None, # string author=None, # email address as string categories=None, # list of string or Category comments=None, # url as string enclosure=None, # an Enclosure guid=None, # a unique string pubDate=None, # a datetime source=None, # a Source icon_url=None, # an icon to display ): if guid is not None: guid = Guid(guid) # Initialise base class .. RSSItem.__init__(self, title, link, description, author, categories, comments, enclosure, guid, pubDate, source) # Add media name space (for item icons) self.icon = None if icon_url is not None: self.icon = MediaContentImage(icon_url)
def __init__(self, title = None, # string link = None, # url as string description = None, # string author = None, # email address as string categories = None, # list of string or Category comments = None, # url as string enclosure = None, # an Enclosure guid = None, # a unique string pubDate = None, # a datetime source = None, # a Source icon_url = None, # an icon to display ): if guid is not None: guid = Guid(guid) # Initialise base class .. RSSItem.__init__(self, title, link, description, author, categories, comments, enclosure, guid, pubDate, source) # Add media name space (for item icons) self.icon = None if icon_url is not None: self.icon = MediaContentImage(icon_url)
def __init__(self, **kwargs): self.dc = 'http://purl.org/dc/elements/1.1/' self.content = kwargs.get('content', None) if 'content' in kwargs: del kwargs['content'] self.creator = kwargs.get('creator', None) if 'creator' in kwargs: del kwargs['creator'] RSSItem.__init__(self, **kwargs)
def refresh(self): """Re-fetches the source of this feed, updates the RSS feed representation to match, outputs a new RSS feed in XML format, and pickles the new state of the feed.""" try: response = self.fetch() headers = response.info() body = response.read() self.lastBuildDate = datetime.datetime.now() try: self.HTML2RSS(headers, body) except Exception as e: #Put the exception into the RSS feed. import sys exception = traceback.format_tb(e.__traceback__) description = "<p>Unable to finish scraping this webpage into a feed. Please get the person in charge of maintaining the scraped feed (<i>not</i> the person in charge of the original website) to fix this.</p> <p>Stack trace:</p> <pre>%s%s</pre>" % ( '\n'.join(exception), e) self.pushRSSItem( RSSItem(link=self.url + '#' + str(time.time()), title='Error scraping this feed', description=description)) self.writeRSS() self.pickle() except urllib.error.HTTPError as e: if e.code == 304: #The page hasn't been modified. Doing nothing is exactly #the right thing to do. pass else: raise e
def toRSSItem(self): title = self.repo.tagname if self.message and len(self.message) > 50: title += " - " + str(Markup.escape(self.message[:50])) + "..." elif self.message: title += " - " + str(Markup.escape(self.message)) if self.dbkeywords: title += " - " + ",".join(self.dbkeywords) description = "<pre>" description += str(self.getpprint(True)) description += "</pre>" if type(title) != unicode: title = unicode(title, 'utf-8') if type(description) != unicode: description = unicode(description, 'utf-8') title = unicodedata.normalize('NFKD', title).encode('ascii', 'ignore') description = unicodedata.normalize('NFKD', description).encode( 'ascii', 'ignore') guid = Config.rooturl + "/commit/" + self.repo.tagname + "/" + self.uniqueid link = '' if self.repo.viewlink: link = self.repo.viewlink.replace('%ID', self.uniqueid) else: link = guid item = RSSItem(title=title, link=link, description=description, guid=Guid(guid, isPermaLink=0), pubDate=unixToDatetime(self.date)) return item
def toRSSItem(self): title = self.repo.tagname if self.message and len(self.message) > 50: title += " - " + self.message[:50] + "..." elif self.message: title += " - " + self.message if self.dbkeywords: title += " - " + ",".join(self.dbkeywords) description = "<pre>" description += self.getpprint() description += "</pre>" title = unicodedata.normalize('NFKD', unicode(title, 'utf-8')).encode( 'ascii', 'ignore') description = unicodedata.normalize('NFKD', unicode(description, 'utf-8')).encode( 'ascii', 'ignore') link = '' if self.repo.viewlink: link = self.repo.viewlink.replace('%ID', self.uniqueid) item = RSSItem(title=title, link=link, description=description, guid=Config.rooturl + "/commit/" + self.repo.tagname + "/" + self.uniqueid, pubDate=unixToDatetime(self.date)) return item
def posts_feed(): base_url = url_for('general.index', _external=True) items = [] posts = Post.get_published(num=10).all() for post in posts: post_url = urljoin(base_url, post.url) # TODO: Add a real description item = RSSItem(title=post.title, link=post_url, description=post.body.split('\r\n', 1)[0], author='{} ({})'.format(post.author.email, post.author.full_name), categories=[tag.name for tag in post.tags], guid=Guid(post_url), pubDate=post.pub_date) items.append(item) feed_config = current_app.config['BLOG_POSTS_FEED'] rss2_feed = RSS2(title=feed_config['title'], link=base_url, description=feed_config['description'], language='en-us', webMaster=feed_config['webmaster'], lastBuildDate=posts[0].pub_date if posts else None, ttl=1440, items=items) return current_app.response_class(rss2_feed.to_xml(encoding='utf-8'), mimetype='application/rss+xml')
def event_to_rssitem(self, event): """Function converting an event (Python dict) to an RSSItem object.""" title = (CrabStatus.get_name(event['status']) + ': ' + event['user'] + ' @ ' + event['host']) if event['command'] is not None: title += ': ' + event['command'] link = self.base + '/job/' + str(event['id']) if event['finishid'] is not None: link += '/output/' + str(event['finishid']) output = '' if event['stdout']: output += event['stdout'] if event['stderr']: if event['stdout']: output += '\n\nStandard Error:\n\n' output += event['stderr'] date = self.store.parse_datetime(event['datetime']) guid = ':'.join(['crab', self.fqdn, str(event['id']), str(calendar.timegm(date.timetuple())), str(event['status'])]) info = {} if output != '': info['description'] = '<pre>' + output + '</pre>' return RSSItem(title=title, link=link, pubDate=date, guid=Guid(guid, isPermaLink = False), **info)
def generate_rss(): messages, _ = get_xugubao_news() items = [RSSItem(title=msg.get("title") if msg.get("title") else f'<img src="data:image/png;base64,{msg.get("content_image")}" />', link=f'https://xuangubao.cn/article/{msg.get("id")}', description=generate_content(msg), pubDate=time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(msg.get("created_at"))) ) for msg in messages] return items
def generate_item(msg): content_template = """ {{msg.get("content")}} <div><a href="{{msg.get("link")}}">阅读原文</a></div> """ content = Template(content_template).render(msg=msg) return RSSItem(title=msg.get("title"), link=msg.get("link"), description=content, pubDate=msg.get("pubdate"), enclosure=msg.get("enclosure"))
def create_rss(): 'Generate XML file' title = u'Dealabs, tous les deals hots - Filtrés' dest = "/var/www/example.org/lab/rss" url = "https://lab.example.org/rss/" filename = 'dealabs.xml' rss = RSS2(title=title.encode('utf-8'), link=os.path.join(url, filename), description=title.encode('utf-8'), lastBuildDate=datetime.now(), items=[RSSItem(**article) for article in filter_deals()]) rss.write_xml(open(os.path.join(dest, filename), "w"), encoding='utf-8')
def rss(self,language=None,*args,**kwargs): if language: l = models.get_language(language) conf = cherrypy.request.app.config['ligiloj'] query = models.Link().select(models.Link,models.Language).join(models.Language) if language: query = query.where(models.Link.language == l) cherrypy.response.headers['Content-Type'] = 'application/xml' return RSS2(title=u'{0} - {1}'.format(conf['site_title'],language and l.name or conf['global_title_text']), link=conf['rss_site_url'], description=conf['rss_description'], language=language or conf['rss_default_language'], items=[RSSItem(title=language and link.title or u"{0}: {1}".format(link.language.name,link.title), link=link.url, pubDate=link.published.isoformat(), guid=Guid(link.url,str(link.id))) for link in query]).to_xml('utf-8')
def generate_rss(self): if 'rss_title' not in self.config or 'rss_description' not in self.config: return RSS2(title=self.config['rss_title'], link=self.root_url, description=self.config['rss_description'], lastBuildDate=datetime.datetime.now(), items=[ RSSItem(title=entry['title'], link=self.root_url + entry['link'], description=entry['html'], guid=Guid(self.root_url + entry['link']), pubDate=datetime.datetime.strptime( entry['date'][:10], '%Y-%m-%d')) for entry in self.entries ]).write_xml(file(self.out + 'feed.xml', 'wb'), encoding='utf-8')
def read_rss(): rss = [] count = 0 while exists(RSS_FILE + str(count)): with open(RSS_FILE + str(count)) as source: title = source.readline() url = source.readline() date = eval(source.readline()) description = ''.join(source.readlines()) rss.append( RSSItem(title=title, link=BASE_URL + url, description=description, guid=Guid(url), pubDate=datetime(*date[0:5]))) count = count + 1 return rss
def compile_rss(posts, conf, outpath): """ Compile a list of Posts to the specified outpath. """ items = [ RSSItem(title=p['title'], link=os.path.join(conf['SITE_URL'], p['category'], p['slug']), description=p['html'], pubDate=p['published_at']) for p in posts ] rss = RSS2(title=conf['SITE_NAME'], link=conf['SITE_URL'], description=conf['SITE_DESC'], lastBuildDate=datetime.now(), items=items) rss.write_xml(open(outpath, 'w'))
def main(): fourtwenty = next420() place = random.choice(fourtwenty['places']) if len(fourtwenty['places']) > 1: title = "Get ready for #fourtwenty at {0} and other fine places.".format( place) else: title = "Get ready for #fourtwenty at {0}.".format(place) file('420.rss', 'w').write( RSS2(title='Global 4:20 clock', link='https://zzzen.com/420', description='Keep up with where #FourTwenty happens next', language='en', items=[ RSSItem(title=title, link='https://zzzen.com/420#{0}'.format( urllib2.quote(place))) ]).to_xml('utf-8'))
def menu(): resource = urllib2.urlopen("http://www.ufrgs.br/ufrgs/ru") page = BeautifulSoup(resource) items = [] for ru in page.find_all("div", "ru"): ru_name = ru.h3.contents[0] desc = ', '.join([(item or '').strip() for item in ru.div.contents if not hasattr(item, 'contents')]) items.append(RSSItem( title = '%s - %s' % (ru_name, date.today().strftime('%d/%m/%Y')), link='http://www.ufrgs.br/ufrgs/ru', description=desc, guid=Guid(ru_name+date.today().isoformat()), )) feed = RSS2( title=u"Cardápio do RU-UFRGS - diário", link='http://www.ufrgs.br/ufrgs/ru', description=u"Cardápio do dia no Restaurante Universitário da UFRGS", pubDate=datetime.today(), items=items, ) return feed.to_xml()
def HTML2RSS(self, headers, body): soup = BeautifulSoup(body, 'html.parser') items = [] for item in soup.find_all("tr", "titlebg2"): a = item.find_all("a")[-1] #topic_details = item.find("div", "topic_details") #author = topic_details.find("strong").a.text #a = topic_details.find("h5").find_all("a")[1] link = a['href'] subject = a.text if 'ICO' not in subject: continue logging.info(subject) rss = RSSItem(author='x', title=subject, link=link, pubDate='', guid=Guid(link), description='') items.append(rss) self.addRSSItems(items)
def HTML2RSS(self, unused_headers, body): html = Dispatch('htmlfile') html.writeln(body) items = [] count = 0 for item in html.body.all: if item.tagName == 'UL': count += 1 if count == 4: break theUL = item.all for item in theUL: if item.tagName == 'LI': title = item.childNodes[0].innerText link = item.childNodes[0].outerHTML if item.childNodes.length >= 2: description = item.innerText else: description = '' items.append( RSSItem(title=title, description=description, link=link)) self.addRSSItems(items)
def __init__(self, **kwargs): self.media_content = kwargs['media_content'] del kwargs['media_content'] RSSItem.__init__(self, **kwargs)
links = [] links_found = {} for link in all_links: href = link.get('href', None) if href and href.startswith('/comics/oots'): link_uri = base_uri + href # link has already been processed if links_found.has_key(link_uri): continue image = get_image_for_uri(link_uri) links.append((link_uri, RSSItem(title=link.contents[0], link=link_uri, description=image, pubDate=datetime.now()))) links_found[link_uri] = True # if we already found 10 entries, let's get out of this thing if len(links_found) > 10: break links.reverse() # do we have a cache already? try: cache_file = open('links.cache') cached_data = pickle.load(cache_file) cache_file.close() except IOError: cached_data = {}
def __init__(self, sparkle_release_note_link=None, **args): self.sparkle_release_note_link = sparkle_release_note_link RSSItem.__init__(self, **args)
def __init__(self, geo_lat=None, geo_long=None, *args, **kwargs): RSSItem.__init__(self, *args, **kwargs) self.geo_lat = geo_lat self.geo_long = geo_long
def __init__(self, **kwargs): self.content = kwargs.get('content', None) if 'content' in kwargs: del kwargs['content'] RSSItem.__init__(self, **kwargs)
hit_list = [] for name in series: name = urllib.quote(name) feed = feedparser.parse('http://ezrss.it/search/index.php?show_name=%s&quality=720p&mode=rss'%name) if len(feed['items'])==0: feed = feedparser.parse('http://ezrss.it/search/index.php?show_name=%s&mode=rss'%name) print feed.url hit_list.append(feed) # get the feeds and join them in one big list feeds = hit_list print "Found",len(feeds),"feeds." entries = [] for feed in feeds: entries.extend(feed['items']) # this section is for sorting the entries decorated = [(entry["date_parsed"], entry) for entry in entries] decorated.sort() decorated.reverse() entries = [entry for (date,entry) in decorated] items = [RSSItem(**item) for item in entries] feeds = RSS2(title="My series feed",description="This feed is an aggregation of various feeds",link="",items = items) f = open('feed.xml','w') f.write(feeds.to_xml()) f.close()
link = urljoin(BASE_URL, a.attrib["href"]) guid = link if "rel" in a.attrib: author = a.text # NOT COMPLIANT description = work.xpath(".//blockquote[@class='userstuff summary']")[0].text_content().strip() category = work.xpath(".//a[@class='tag']")[0].text try: comments = urljoin(BASE_URL, work.xpath(".//dd[@class='comments']/a/@href")[0]) except Exception: comments = None pubDate = dateparser.parse(work.xpath(".//p[@class='datetime']")[0].text) item = RSSItem( title = title, link = link, description = description, guid = Guid(guid), pubDate = pubDate, comments = comments) items.append(item) print (title, link, author, description, category,comments, pubDate) item = None link = None description = None guid = None pubDate = None comments = None rss = RSS2(