def aggregator(feed, max_entries=5): """ ## Embeds a feed aggregator - ``name`` is the aggregator name - ``feed`` is a list comma separated feed urls (http://rss.cbc.ca/lineup/topstories.xml) - ``max_entries`` is the max number of displayed entries (default=5) """ import gluon.contrib.feedparser as feedparser lfeeds = isinstance(feeds, (str, unicode)) and feeds or feeds.split(",") content = DIV(A(d.channel.title, _href=d.channel.link, _rel=d.channel.description), UL(), _id='web2py_aggregator') for feed in lfeeds: d = feedparser.parse(feed) for entry in d.entries[:max_entried]: content[1] += LI( A(entry.title, ' ', SPAN(entry.updated), _href=entry.link, _rel=entry.descstiption, _class=web2py_aggregator_link)) return content
def group_feed_reader(group, mode='div', counter='5'): """parse group feeds""" url = "http://groups.google.com/group/%s/feed/rss_v2_0_topics.xml?num=%s" %\ (group, counter) from gluon.contrib import feedparser g = feedparser.parse(url) if mode == 'div': html = XML( TAG.BLOCKQUOTE(UL(*[ LI( A(entry['title'] + ' - ' + entry['author'][entry['author'].rfind('('):], _href=entry['link'], _target='_blank')) for entry in g['entries'] ]), _class="boxInfo", _style="padding-bottom:5px;")) else: html = XML( UL(*[ LI( A(entry['title'] + ' - ' + entry['author'][entry['author'].rfind('('):], _href=entry['link'], _target='_blank')) for entry in g['entries'] ])) return html
def get_feeds(data): session.forget() import gluon.contrib.feedparser as feedparser pub = CAT() for f in data: try: d = feedparser.parse(f.link) entradas = [ DIV(A(entry.title[:50] + '...', _href=entry.link, _class="noticia_link"), ' ', DIV(prettydate(guess_updated(entry.updated)) or entry.updated, _class="noticia_meta"), _class="noticia_contenido") for entry in d.entries[:3] ] pub.append( LI(DIV(H5( A(str(f.title).capitalize(), _href=f.link, _style="white-space:normal !important;")), CAT(entradas), _class="thumbnail well"), _class="span2")) except Exception as e: pub.append(SPAN("!", _class="badge")) return pub
def aggregator(feed, max_entries=5): """ ## Embeds a feed aggregator - ``name`` is the aggregator name - ``feed`` is a list comma separated feed urls (http://rss.cbc.ca/lineup/topstories.xml) - ``max_entries`` is the max number of displayed entries (default=5) """ import gluon.contrib.feedparser as feedparser lfeeds = isinstance(feeds, (str, unicode)) and feeds or feeds.split(",") content = DIV( A(d.channel.title, _href=d.channel.link, _rel=d.channel.description), UL(), _id="web2py_aggregator" ) for feed in lfeeds: d = feedparser.parse(feed) for entry in d.entries[:max_entried]: content[1] += LI( A( entry.title, " ", SPAN(entry.updated), _href=entry.link, _rel=entry.descstiption, _class=web2py_aggregator_link, ) ) return content
def crawler_rss(link,records=10): import gluon.contrib.feedparser as feedparser d = feedparser.parse(link) i = 0 div=DIV() for entry in d.entries : if i == records : break else : ul = UL(_class='contentul row_'+str(i+1)) if find_imageURL_in_content(entry.description)!='': print entry.description li= LI(_class='image') a = A(_href=entry.link,_target='_blank') img = IMG(_src= find_imageURL_in_content(entry.description)) a.append(img) li.append(a) ul.append(li) li1 = LI(_class='name') else: li1 = LI(_class='name_no_img') a1 = A(_href=entry.link,_target='_blank') a1.append(entry.title) li1.append(a1) ul.append(li1) div.append(ul) i+=1 return div
def _u2d(feedlink): import gluon.contrib.feedparser as feedparser import urllib2 fidx = feedlink[0] flink = feedlink[1] feed = feedparser.parse(flink) maxfeeds = 4 limite = 0 #print('%s: %s' % (request.now.now(),db.feed[fidx].title)) ###################### ! for e in feed.entries: # revisando si el artículo obtenido ya estaba en la db #edata = db((db.noticia.feed == fidx) & (db.noticia.title == XML(e.title))).select(db.noticia.id) if limite == maxfeeds: break try: xurl_api = choice(xurl_service) xurl = urllib2.urlopen("%(api)s=%(longurl)s" % dict(api=xurl_api, longurl=e.link)).read() except Exception, e: print('No se pudo acortar la url: %s' % e) continue #last8news = db(db.noticia).count() - 8 no_existe = db((db.noticia.title.contains(XML(e.title)))).isempty() #si no encuentra nada, inserta en la db, sino no hace nada if no_existe: print('\t%s' % xurl) try: actualizado = e.updated except: actualizado = request.now.now() try: DESCRIPTION = e.description except: DESCRIPTION = e.link try: db.noticia.insert(title=XML(e.title), link=e.link, description=XML(DESCRIPTION), updated=actualizado, created_on=request.now.now(), feed=fidx, shorturl=xurl) db.commit() except Exception, e: print('Error registrando noticia: %s' % e) limite += 1
def identica(self): import gluon.contrib.feedparser as feedparser identica_user = self.user limite = 0 if self.timeline == 'friends': identica_feed = 'friends_timeline' link2src = A('Timeline Contactos', _href = 'http://identi.ca/%s/all' % identica_user, _class = 'title', _title = 'Timeline público de mi red de contactos') elif self.timeline == 'user': identica_feed = 'user_timeline' link2src = A('@' + identica_user, _href = 'http://identi.ca/' + identica_user, _class = 'title', _title = 'Mi microblog en identi.ca') urlfeed = 'http://identi.ca/api/statuses/%(tl)s/%(user)s.rss' % dict(user = identica_user, tl = identica_feed) feed = feedparser.parse(urlfeed) identica = DIV(link2src, _class = 'microblog') dents = UL(_class = 'dents') for dent in feed.entries: if limite == self.limit: break else: limite = limite + 1 if self.timeline: try: #autor = XML(B(str(dent.title).split(':')[0])) autor = dent.title.split(':')[0] + ': ' dents.append(LI(B(autor), XML(dent.description))) except: self.timeline = None #redirect(URL(f='microblog')) else: dents.append(LI(XML(dent.description))) identica.insert(len(identica), dents) ''' import urllib2 #import re u = urllib2.urlopen(atom).read() meta = TAG(u) dents = UL() for dent in meta.elements('content',_type='html'): dents.append(LI(XML(str(dent).replace('<','<').replace('>','>')))) ''' return XML(identica)
def planet(): FILTER = 'web2py' import datetime import re import gluon.contrib.rss2 as rss2 import gluon.contrib.feedparser as feedparser # filter for general (not categorized) feeds regex = re.compile(FILTER, re.I) # select all feeds feeds = db(db.feed).select() entries = [] for feed in feeds: # fetch and parse feeds d = feedparser.parse(feed.url) for entry in d.entries: # filter feed entries if not feed.general or regex.search(entry.description): # extract entry attributes entries.append({ 'feed': {'author':feed.author, 'link':feed.link, 'url':feed.url, 'name':feed.name}, 'title': entry.title, 'link': entry.link, 'description': entry.description, 'author': hasattr(entry, 'author_detail') \ and entry.author_detail.name \ or feed.author, 'date': datetime.datetime(*entry.date_parsed[:6]) }) # sort entries by date, descending entries.sort(key=lambda x: x['date'], reverse=True) now = datetime.datetime.now() # aggregate rss2 feed with parsed entries rss = rss2.RSS2( title="Planet web2py", link=URL("planet").encode("utf8"), description="planet author", lastBuildDate=now, items=[ rss2.RSSItem( title=entry['title'], link=entry['link'], description=entry['description'], author=entry['author'], # guid = rss2.Guid('unknown'), pubDate=entry['date']) for entry in entries ]) # return new rss feed xml response.headers['Content-Type'] = 'application/rss+xml' return rss2.dumps(rss)
def planet(): FILTER = 'web2py' import datetime import re import gluon.contrib.rss2 as rss2 import gluon.contrib.feedparser as feedparser # filter for general (not categorized) feeds regex = re.compile(FILTER,re.I) # select all feeds feeds = db(db.feed).select() entries = [] for feed in feeds: # fetch and parse feeds d = feedparser.parse(feed.url) for entry in d.entries: # filter feed entries if not feed.general or regex.search(entry.description): # extract entry attributes entries.append({ 'feed': {'author':feed.author, 'link':feed.link, 'url':feed.url, 'name':feed.name}, 'title': entry.title, 'link': entry.link, 'description': entry.description, 'author': hasattr(entry, 'author_detail') \ and entry.author_detail.name \ or feed.author, 'date': datetime.datetime(*entry.date_parsed[:6]) }) # sort entries by date, descending entries.sort(key=lambda x: x['date'],reverse=True) now = datetime.datetime.now() # aggregate rss2 feed with parsed entries rss = rss2.RSS2(title="Planet web2py", link = URL("planet").encode("utf8"), description = "planet author", lastBuildDate = now, items = [ rss2.RSSItem( title = entry['title'], link = entry['link'], description = entry['description'], author = entry['author'], # guid = rss2.Guid('unknown'), pubDate = entry['date']) for entry in entries] ) # return new rss feed xml response.headers['Content-Type']='application/rss+xml' return rss2.dumps(rss)
def aggregator(feed, max_entries=5): import gluon.contrib.feedparser as feedparser d = feedparser.parse(feed) title = d.channel.title link = d.channel.link description = d.channel.description div = DIV(A(B(title[0], _href=link[0]))) created_on = request.now for entry in d.entries[0:max_entries]: div.append(A(entry.title, ' - ', entry.updated, _href=entry.link)) div.append(DIV(description)) return div
def get_planet_rss(arg): import datetime import re import gluon.contrib.rss2 as rss2 import gluon.contrib.feedparser as feedparser # filter for general (not categorized) feeds regex = re.compile('web2py',re.I) feeds = db(db.feed.id>0).select() entries = [] for feed in feeds: # fetch and parse feeds d = feedparser.parse(feed.url) for entry in d.entries: if not feed.general or regex.search(entry.description): # extract entry attributes entries.append({ 'feed': {'author':feed.author,'link':feed.link, \ 'url':feed.url,'name':feed.name}, 'title': entry.title, 'link': entry.link, 'description': entry.description, 'author': hasattr(entry, 'author_detail') and entry.author_detail.name or feed.author, 'date': datetime.datetime(*entry.date_parsed[:6]) }) # sort entries by date, descending entries.sort(key=lambda x: x['date'],reverse=True) now = datetime.datetime.now() # aggregate rss2 feed with parsed entries rss = rss2.RSS2(title=PLANET_TITLE, link = URL(r=request,c="default",f="planet"), description = PLANET_DESCRIPTION, lastBuildDate = now, items = [ rss2.RSSItem( title = entry['title'], link = entry['link'], description = entry['description'], author = entry['author'], # guid = rss2.Guid('unkown'), pubDate = entry['date']) for entry in entries] ) return rss
def aggregator(): import gluon.contrib.feedparser as feedparser d = feedparser.parse("http://www.republicain-lorrain.fr/sports/rss") return dict(title=d.channel.title, link=d.channel.link, description=d.channel.description, created_on=request.now, entries=[ dict(title=entry.title, link=entry.link, description=entry.description, created_on=request.now) for entry in d.entries ])
def rss(url='http://rss.slashdot.org/Slashdot/slashdot/to'): """ read and display RSS feed from url <url>""" import datetime import gluon.contrib.rss2 as rss2 import gluon.contrib.feedparser as feedparser d= feedparser.parse(url) d = feedparser.parse("") rss = rss2.RSS2(title=d.channel.title, link = d.channel.link, description = d.channel.description, lastBuildDate = datetime.datetime.now(), items = [ rss2.RSSItem( title = entry.title, link = entry.link, description = entry.description, pubDate=datetime.datetime.now()) for entry in d.entries] ) response.headers['Content-Type']='application/rss+xml' return rss2.dumps(rss)
def aggregator(feed, max_entries=5): import gluon.contrib.feedparser as feedparser d = feedparser.parse(feed) title = d.channel.title link = d.channel.link description = d.channel.description div = DIV(A(B(title[0], _href=link[0]))) created_on = request.now for entry in d.entries[0:max_entries]: div.append(A(entry.title, " - ", entry.updated, _href=entry.link)) div.append(DIV(description)) return div
def get_benfica_rss(): import gluon.contrib.feedparser as feedparser d = feedparser.parse( "http://feeds.record.xl.pt/?idcanal=11") return dict(title=d.channel.title, link = d.channel.link, description = d.channel.description, created_on = request.now, entries = [ dict(title = entry.title, link = entry.link, description = entry.description, created_on = request.now) for entry in d.entries])
def aggregator(): import gluon.contrib.feedparser as feedparser d = feedparser.parse( "http://www.republicain-lorrain.fr/sports/rss") return dict(title=d.channel.title, link = d.channel.link, description = d.channel.description, created_on = request.now, entries = [ dict(title = entry.title, link = entry.link, description = entry.description, created_on = request.now) for entry in d.entries])
def search(): classification = request.args(0) or redirect(URL('index')) form = SQLFORM.factory(Field('search',default=request.get_vars.search), _method='GET') import urllib from gluon.contrib.feedparser import parse URL = 'http://export.arxiv.org/api/query?search_query=%s' if form.accepts(request.get_vars): query = urllib.quote(' AND '.join(['cat:'+classification]+\ ['all:'+k for k in form.vars.search.strip().split()])) entries = parse(URL % query)['entries'] # title, else: entries = [] return dict(form=form,entries=entries)
def feed(): import gluon.contrib.feedparser as feedparser d = feedparser.parse("http://feeds.feedburner.com/observatoire-securite/VxVR") title = d.channel.title link = d.channel.link description = d.channel.description created_on = request.now entries = [] for entry in d.entries: if entry.title and entry.link: entries.append(DIV(A(entry.title, _href=entry.link), _class="gray_gradient")) # entries = [ dict(title = entry.title, link = entry.link, description = entry.description, created_on = request.now) for entry in d.entries] return DIV(*entries[:5])
def plugin_feedreader(name, source='google-group'): """parse group feeds""" from gluon.contrib import feedparser if source=='google-group': URL = "http://groups.google.com/group/%(name)s/feed/rss_v2_0_msgs.xml" elif source=='google-code': URL = "http://code.google.com/feeds/p/%(name)s/hgchanges/basic" else: URL = source url = URL % dict(name=name) g = feedparser.parse(url) html = UL(*[LI(A(entry['title'],_href=entry['link']))\ for entry in g['entries'][0:5]]) return XML(html)
def rss_aggregator(): import datetime import gluon.contrib.rss2 as rss2 import gluon.contrib.feedparser as feedparser d = feedparser.parse('http://rss.slashdot.org/Slashdot/slashdot/to') rss = rss2.RSS2(title=d.channel.title, link=d.channel.link, description=d.channel.description, lastBuildDate=datetime.datetime.now(), items=[rss2.RSSItem(title=entry.title, link=entry.link, description=entry.description, pubDate=datetime.datetime.now()) for entry in d.entries]) response.headers['Content-Type'] = 'application/rss+xml' return rss.to_xml(encoding='utf-8')
def search(): classification = request.args(0) or redirect(URL("index")) form = SQLFORM.factory(Field("search", default=request.get_vars.search), _method="GET") import urllib from gluon.contrib.feedparser import parse URL = "http://export.arxiv.org/api/query?search_query=%s" if form.accepts(request.get_vars): query = urllib.quote( " AND ".join(["cat:" + classification] + ["all:" + k for k in form.vars.search.strip().split()]) ) entries = parse(URL % query)["entries"] # title, else: entries = [] return dict(form=form, entries=entries)
def code_feed_reader(project, mode='div'): """parse code feeds""" url = "http://code.google.com/feeds/p/%s/hgchanges/basic" % project from gluon.contrib import feedparser g = feedparser.parse(url) if mode == 'div': html = XML(DIV(UL(*[LI(A(entry['title'],_href=entry['link'],\ _target='_blank'))\ for entry in g['entries'][0:5]]),\ _class="boxInfo",\ _style="padding-bottom:5px;")) else: html = XML(UL(*[LI(A(entry['title'],_href=entry['link'],\ _target='_blank'))\ for entry in g['entries'][0:5]])) return html
def crawler_rss_more(link): import gluon.contrib.feedparser as feedparser d = feedparser.parse(link) i = 0 div=DIV() for entry in d.entries : ul = UL(_class='contentul row_'+str(i+1)) li1 = LI(_class='name field_2') a1 = A(_href=entry.link,_target='_blank') a1.append(entry.title) li1.append(a1) ul.append(li1) li2 = LI(_class='heading field_3') li2.append(XML(entry.description)) ul.append(li2) div.append(ul) i+=1 return div
def crawler_rss_from_bao_moi(keyword): import gluon.contrib.feedparser as feedparser d = feedparser.parse('http://www.baomoi.com/Rss/RssFeed.ashx?ph='+keyword+'&s=') i = 0 div=DIV() for entry in d.entries : ul = UL(_class='contentul row_'+str(i+1)) li1 = LI(_class='name field_2') a1 = A(_href=entry.link,_target='_blank') a1.append(entry.title) li1.append(a1) ul.append(li1) li2 = LI(_class='heading field_3') li2.append(XML(entry.description)) ul.append(li2) div.append(ul) i+=1 return div
def code_feed_reader(project, mode='div'): """parse code feeds""" url = "http://code.google.com/feeds/p/%s/hgchanges/basic" % project from gluon.contrib import feedparser g = feedparser.parse(url) if mode == 'div': html = XML(DIV(UL(*[LI(A(entry['title'], _href=entry['link'], _target='_blank')) for entry in g['entries'][0:5]]), _class="boxInfo", _style="padding-bottom:5px;")) else: html = XML(UL(*[LI(A(entry['title'], _href=entry['link'], _target='_blank')) for entry in g['entries'][0:5]])) return html
def update_feed(feed): status_log = ["Checking Feed: %s" % feed.link] try: d = feedparser.parse( feed.link ) if d.channel.has_key("link"): link = d.channel.link else: for link in d.channel.links: if link.rel == "self": link = link.href break else: link = "#" # Update Feed Data feed.update_record(title=d.channel.title, description=d.channel.description if d.channel.has_key("description") and d.channel.description != "" else d.channel.title, updated = request.now, base_link =link) for entry in d.entries: entry_feed = db( (db.rss_entry.link == entry.link) & (db.rss_entry.feed == feed)).select().first() if entry_feed: if entry_feed.updated != datetime.fromtimestamp(mktime(entry.updated_parsed)): status_log.append("Updating Entry: %s" % entry.title) entry_feed.update(title=entry.title, description=entry.description or entry.title, updated = entry.updated_parsed) else: status_log.append("Adding Entry: %s" % entry.title) db.rss_entry.insert(title=entry.title, feed=feed, link=entry.link, description=entry.description, updated = datetime.fromtimestamp(mktime(entry.updated_parsed))) #except Exception as err: # status_log.append("ERROR: %s" % err) except: status_log.append("ERROR") return status_log
def rss_aggregator(): import datetime import gluon.contrib.rss2 as rss2 import gluon.contrib.feedparser as feedparser d = feedparser.parse("http://rss.slashdot.org/Slashdot/slashdot/to") rss = rss2.RSS2( title=d.channel.title, link=d.channel.link, description=d.channel.description, lastBuildDate=datetime.datetime.now(), items=[ rss2.RSSItem( title=entry.title, link=entry.link, description=entry.description, pubDate=datetime.datetime.now() ) for entry in d.entries ], ) response.headers["Content-Type"] = "application/rss+xml" return rss2.dumps(rss)
def group_feed_reader(group,mode='div',counter='5'): """parse group feeds""" url = "http://groups.google.com/group/%s/feed/rss_v2_0_topics.xml?num=%s" %\ (group,counter) from gluon.contrib import feedparser g = feedparser.parse(url) if mode == 'div': html = XML(TAG.BLOCKQUOTE(UL(*[LI(A(entry['title']+' - ' +\ entry['author'][entry['author'].rfind('('):],\ _href=entry['link'],_target='_blank'))\ for entry in g['entries'] ]),\ _class="boxInfo",\ _style="padding-bottom:5px;")) else: html = XML(UL(*[LI(A(entry['title']+' - ' +\ entry['author'][entry['author'].rfind('('):],\ _href=entry['link'],_target='_blank'))\ for entry in g['entries'] ])) return html
request_dict["type"] = usha_cats[sms_dict["categorization"]] return request_dict import datetime import gluon.contrib.feedparser as feedparser url_base = "http://server.domain/rss.php?key=keyrequired" N = 100 start = 0 done = False while done == False: url = url_base + "&limit=" + str(start) + "," + str(N) d = feedparser.parse(url) for entry in d.entries: rec, locd = rss2record(entry) # Don't import duplicates if db(db.rms_sms_request.ush_id == rec['ush_id']).count() == 0: locid = None if locd != {}: # Calculate WKT for display on Map locd['wkt'] = 'POINT(%f %f)' % (locd['lon'], locd['lat']) locid = db.gis_location.insert(**locd) rec["location_id"] = locid smsid = db.rms_sms_request.insert(**rec)
#request_dict["actionable" ] = sms_dict["actionable" ] if sms_dict["categorization"] in usha_cats : request_dict["type"] = usha_cats[sms_dict["categorization"]] return request_dict import datetime import gluon.contrib.feedparser as feedparser url_base = "http://server.domain/rss.php?key=keyrequired" N = 100 start = 0 done = False while done == False: url = url_base + "&limit=" + str(start) + "," + str(N) d = feedparser.parse(url) for entry in d.entries: rec, locd = rss2record(entry) # Don't import duplicates if db(db.rms_sms_request.ush_id == rec['ush_id']).count() == 0: locid = None if locd != {}: # Calculate WKT for display on Map locd['wkt'] = 'POINT(%f %f)' % (locd['lon'], locd['lat']) locid = db.gis_location.insert(**locd) rec["location_id"] = locid smsid = db.rms_sms_request.insert(**rec)