def purge_reload(feed_uid): reload(transform) feed_uid = int(feed_uid) if feed_uid in feed_guid_cache: del feed_guid_cache[feed_uid] from singleton import db c = db.cursor() try: # refresh filtering rules filters.load_rules(db, c) c.execute("delete from fm_items where item_feed_uid=? and item_rating=0", [feed_uid]) c.execute("""delete from fm_tags where exists ( select item_uid from fm_items where item_uid=tag_item_uid and item_feed_uid=? and item_rating=0 )""", [feed_uid]) c.execute("""update fm_feeds set feed_modified=NULL, feed_etag=NULL where feed_uid=?""", [feed_uid]) c.execute("select feed_xml from fm_feeds where feed_uid=?", [feed_uid]) feed_xml = c.fetchone()[0] db.commit() f = feedparser.parse(feed_xml) if not f.feed: raise ParseError normalize.normalize_feed(f) clear_errors(db, c, feed_uid, f) filters.load_rules(db, c) num_added = process_parsed_feed(db, c, f, feed_uid) db.commit() finally: c.close()
def apply(self, content, *args, **kwargs): item = args[1] if self.link_substr in item['link']: try: # check if this item has not already been loaded before guid = item['id'] from singleton import db, sqlite c = db.cursor() if sqlite.paramstyle == 'qmark': c.execute("select item_link from fm_items where item_guid=?", [guid]) elif sqlite.paramstyle == 'pyformat': c.execute("select item_link from fm_items where item_guid=%guid)s", {'guid': guid}) link = c.fetchone() c.close() if link: print >> param.log, 'not dereferencing', guid, '->', link[0] item['link'] = link[0] return content # we haven't seen this article before, buck up and load it deref = urllib2.urlopen(item['link']).read() m = self.re.search(deref) if m and m.groups(): item['link'] = m.groups()[0] except: util.print_stack() return content
def update_feed_xml(feed_uid, feed_xml): """Update a feed URL and fetch the feed. Returns the number of new items""" feed_uid = int(feed_uid) f = feedparser.parse(feed_xml) if not f.feed: raise ParseError normalize.normalize_feed(f) from singleton import db c = db.cursor() clear_errors(db, c, feed_uid, f) try: try: c.execute("update fm_feeds set feed_xml=?, feed_html=? where feed_uid=?", [feed_xml, str(f.feed['link']), feed_uid]) except sqlite.IntegrityError, e: if 'feed_xml' in str(e): db.rollback() raise FeedAlreadyExists else: db.rollback() raise UnknownError(str(e)) filters.load_rules(db, c) num_added = process_parsed_feed(db, c, f, feed_uid) db.commit() return num_added
def run(self): from singleton import db c = db.cursor() while True: item_uid, rating = self.in_q.get() try: c.execute("""update fm_items set item_rating=?, item_rated=julianday('now') where item_uid=?""", [rating, item_uid]) fb_token = param.settings.get('fb_token', None) if rating == 1 and fb_token: c.execute("""select feed_uid, item_link, item_title, feed_private from fm_items, fm_feeds where item_uid=? and feed_uid=item_feed_uid""", [item_uid]) feed_uid, url, title, private = c.fetchone() db.commit() if rating == 1 and fb_token and not private: callout = random.choice( ['Interesting: ', 'Notable: ', 'Recommended: ', 'Thumbs-up: ', 'Noteworthy: ', 'FYI: ', 'Worth reading: ']) try: social.fb_post(fb_token, callout + title, url) except social.ExpiredToken: notification(db, c, feed_uid, 'Service notification', 'The Facebook access token has expired', link='/settings#facebook') except: util.print_stack() # this will never be reached c.close()
def title_url(feed_uid): feed_uid = int(feed_uid) from singleton import db c = db.cursor() try: c.execute("select feed_title, feed_html from fm_feeds where feed_uid=?", [feed_uid]) return c.fetchone() finally: c.close()
def catch_up(feed_uid): feed_uid = int(feed_uid) from singleton import db c = db.cursor() try: c.execute("""update fm_items set item_rating=-1 where item_feed_uid=? and item_rating=0""", [feed_uid]) db.commit() finally: c.close()
def link_already(url): from singleton import db print >> param.activity, 'checking for deja-vu for', url, c = db.cursor() c.execute("select count(*) from fm_items where item_link like ?", [url + '%']) l = c.fetchone() c.close() print >> param.log, l and l[0] return l and l[0]
def update_item(item_uid, link, title, content): item_uid = int(item_uid) from singleton import db c = db.cursor() try: c.execute("""update fm_items set item_link=?, item_title=?, item_content=? where item_uid=?""", [link, title, content, item_uid]) db.commit() finally: c.close()
def cleanup(db=None, c=None): """garbage collection - see param.py this is done only once a day between 3 and 4 AM as this is quite intensive and could interfere with user activity It can also be invoked by running temboz --clean """ if not db: from singleton import db c = db.cursor() from singleton import sqlite_cli if getattr(param, 'garbage_contents', False): c.execute("""update fm_items set item_content='' where item_rating < 0 and item_created < julianday('now')-?""", [param.garbage_contents]) db.commit() if getattr(param, 'garbage_items', False): c.execute("""delete from fm_items where item_uid in ( select item_uid from fm_items, fm_feeds where item_created < min(julianday('now')-?, feed_oldest-7) and item_rating<0 and feed_uid=item_feed_uid)""", [param.garbage_items]) db.commit() singleton.snr_mv(db, c) c.execute("""delete from fm_tags where not exists( select item_uid from fm_items where item_uid=tag_item_uid )""") db.commit() c.execute('vacuum') # we still hold the PseudoCursor lock, this is a good opportunity to backup try: os.mkdir('backups') except OSError: pass prune_feed_guid_cache() os.system((sqlite_cli + ' rss.db .dump | %s > backups/daily_' \ + time.strftime('%Y-%m-%d') + '%s') % param.backup_compressor) # rotate the log os.rename(param.log_filename, 'backups/log_' + time.strftime('%Y-%m-%d')) param.log.close() param.log = open(param.log_filename, 'a', 0) os.dup2(param.log.fileno(), 1) os.dup2(param.log.fileno(), 2) # delete old backups backup_re = re.compile( 'daily_[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]\\.') log_re = re.compile( 'log_[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]') for fn in os.listdir('backups'): if backup_re.match(fn) or log_re.match(fn): elapsed = time.time() - os.stat('backups/' + fn).st_ctime if elapsed > 86400 * param.daily_backups: try: os.remove('backups/' + fn) except OSError: pass
def update_feed_private(feed_uid, private): feed_uid = int(feed_uid) private = int(bool(private)) from singleton import db c = db.cursor() try: c.execute("update fm_feeds set feed_private=? where feed_uid=?", [private, feed_uid]) db.commit() finally: c.close()
def set_status(feed_uid, status): feed_uid = int(feed_uid) status = int(status) from singleton import db c = db.cursor() try: c.execute("update fm_feeds set feed_status=? where feed_uid=?", [status, feed_uid]) db.commit() finally: c.close()
def hard_purge(feed_uid): feed_uid = int(feed_uid) from singleton import db c = db.cursor() try: c.execute("delete from fm_items where item_feed_uid=?", [feed_uid]) c.execute("delete from fm_rules where rule_feed_uid=?", [feed_uid]) c.execute("delete from fm_feeds where feed_uid=?", [feed_uid]) db.commit() finally: c.close() filters.invalidate()
def update_feed_html(feed_uid, feed_html): """Update a feed HTML link""" feed_uid = int(feed_uid) from singleton import db c = db.cursor() try: c.execute("update fm_feeds set feed_html=? where feed_uid=?", [feed_html, feed_uid]) db.commit() finally: c.close()
def update_feed_desc(feed_uid, feed_desc): """Update a feed desc""" feed_uid = int(feed_uid) from singleton import db c = db.cursor() try: c.execute("update fm_feeds set feed_desc=? where feed_uid=?", [feed_desc, feed_uid]) db.commit() finally: c.close()
def update_feed_dupcheck(feed_uid, dupcheck): feed_uid = int(feed_uid) dupcheck = int(bool(dupcheck)) # XXX run a dupcheck pass retroactively here if dupcheck == 1 from singleton import db c = db.cursor() try: c.execute("update fm_feeds set feed_dupcheck=? where feed_uid=?", [dupcheck, feed_uid]) db.commit() finally: c.close()
def update_feed_exempt(feed_uid, exempt): feed_uid = int(feed_uid) exempt = int(bool(exempt)) from singleton import db c = db.cursor() try: c.execute("update fm_feeds set feed_exempt=? where feed_uid=?", [exempt, feed_uid]) if exempt: filters.exempt_feed_retroactive(db, c, feed_uid) db.commit() finally: c.close()
def run(): # force loading of the database so we don't have to wait an hour to detect # a database format issue from singleton import db c = db.cursor() update.load_settings(db, c) c.close() logging.getLogger().setLevel(logging.INFO) server = Server((getattr(param, 'bind_address', ''), param.port), Handler) pidfile = open('temboz.pid', 'w') print >> pidfile, os.getpid() pidfile.close() server.serve_forever()
def import_opml(opml_file): tree = parse_opml(opml_file) from singleton import db c = db.cursor() ok = 0 dup = 0 for feed in tree: feed['feed_etag'] = '' try: c.execute("""insert into fm_feeds (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values (:xmlUrl, :feed_etag, :htmlUrl, :title, :desc)""", feed) ok += 1 except sqlite.IntegrityError, e: if 'feed_xml' not in str(e): raise dup += 1
def update(where_clause=''): from singleton import db c = db.cursor() # refresh filtering rules filters.load_rules(db, c) # at 3AM by default, perform house-cleaning if time.localtime()[3] == param.backup_hour: cleanup(db, c) # create worker threads and the queues used to communicate with them work_q = Queue.Queue() process_q = Queue.Queue() workers = [] for i in range(param.feed_concurrency): workers.append(FeedWorker(i + 1, work_q, process_q)) workers[-1].start() # assign work c.execute("""select feed_uid, feed_xml, feed_etag, feed_dupcheck, strftime('%s', feed_modified) from fm_feeds where feed_status=0 """ + where_clause) for feed_uid, feed_xml, feed_etag, feed_dupcheck, feed_modified in c: if feed_modified: feed_modified = float(feed_modified) feed_modified = time.localtime(feed_modified) else: feed_modified = None work_q.put((feed_uid, feed_xml, feed_etag, feed_modified, feed_dupcheck)) # None is an indication for workers to stop for i in range(param.feed_concurrency): work_q.put(None) workers_left = param.feed_concurrency while workers_left > 0: feed_info = process_q.get() # exited worker if not feed_info: workers_left -= 1 else: try: update_feed(db, c, *feed_info) except: util.print_stack() db.commit() # give reader threads an opportunity to get their work done time.sleep(1) c.close()
def update_feed_filter(feed_uid, feed_filter): """Update a feed desc""" feed_uid = int(feed_uid) feed_filter = feed_filter.strip() if feed_filter: # check syntax compile(filters.normalize_rule(feed_filter), 'web form', 'eval') val = feed_filter else: val = None from singleton import db c = db.cursor() try: c.execute("update fm_feeds set feed_filter=? where feed_uid=?", [val, feed_uid]) db.commit() filters.invalidate() finally: c.close()
def process_request(self): try: if self.path in ['', '/']: self.browser_output(301, None, 'This document has moved.', ['Location: /view']) return path, query_string = urlparse.urlparse(self.path)[2:5:2] vars = [] if query_string: # parse_qsl does not comply with RFC 3986, we have to decode UTF-8 query_list = [(n, v.decode('UTF-8')) for n, v in urlparse.parse_qsl(query_string, 1)] self.input.update(dict(query_list)) if param.debug: logging.info((self.command, self.path, self.request_version, vars)) if path.endswith('.gif') and path[1:] in self.images: self.browser_output(200, 'image/gif', self.images[path[1:]], http_headers=no_expire) return if path.endswith('.js') and path[1:] in self.rsrc: self.browser_output(200, 'text/javascript', self.rsrc[path[1:]], http_headers=no_expire) return if path.startswith('/tiny_mce'): # guard against attempts to subvert security using ../ path = os.path.normpath('.' + path) assert path.startswith('tiny_mce') self.set_mime_type(path) self.browser_output(200, self.mime_type, open(path).read(), http_headers=no_expire) return if path.count('favicon.ico') > 0: self.favicon() if path.endswith('.css'): path = path.replace('.css', '_css') tmpl = path.split('/', 1)[1].strip('/') self.use_template(tmpl, [self.input]) if not self.require_auth(param.auth_dict): return if path.startswith('/redirect/'): from singleton import db c = db.cursor() item_uid = int(path[10:]) c.execute('select item_link from fm_items where item_uid=%d' % item_uid) redirect_url = c.fetchone()[0] c.close() self.browser_output(301, None, 'This document has moved.', ['Location: ' + redirect_url]) return if path.startswith('/threads'): frames = sys._current_frames() row = 0 out = [] if singleton.c_opened: out.append('<h1>Open Cursors</h1>\n') for curs, tb in singleton.c_opened.iteritems(): if curs not in singleton.c_closed: row += 1 if row % 2: color = '#ddd' else: color = 'white' out.append('<div style="background-color: ' + color + '">\n<pre>') out.append(curs.replace('<', '<').replace('>', '>') + '\n') out.append('\n'.join(tb[:-2])) out.append('</pre></div>\n') out.append('<h1>Threads</h1>\n') row = 0 for thread_id, frame in sorted(frames.iteritems()): if thread_id == threading.currentThread()._Thread__ident: continue row += 1 if row % 2: color = '#ddd' else: color = 'white' out.append('<div style="background-color: ' + color + '">\n<pre>') out.append('Thread %s (%d refs)\n' % (thread_id, sys.getrefcount(frame))) out.append(''.join(traceback.format_stack(frame)).replace( '&', '&').replace('<', '<').replace('>', '>')) out.append('\n<hr>\n') out.append(pprint.pformat(frame.f_locals).replace( '&', '&').replace('<', '<').replace('>', '>')) out.append('\n</pre>\n</div>\n') del frames self.browser_output(200, 'text/html', ''.join(out)) return if path.startswith('/xmlfeedback/'): op, item_uid = path.split('/')[2::2] item_uid = item_uid.split('.')[0] # for safety, these operations should be idempotent if op in ['promote', 'demote', 'basic', 'yappi']: if op != 'yappi': item_uid = int(item_uid) getattr(self, 'op_' + op)(item_uid) self.xml() return if path.startswith('/stem'): txt = self.input['q'] stem = ' '.join(normalize.stem(normalize.get_words(txt))) self.browser_output(200, 'text/plain', stem) return if path.startswith('/add_kw_rule'): from singleton import db c = db.cursor() try: filters.add_kw_rule(db, c, **self.input) except: util.print_stack() db.commit() c.close() self.xml() return if path.startswith('/del_kw_rule'): from singleton import db c = db.cursor() try: filters.del_kw_rule(db, c, **self.input) except: util.print_stack() db.commit() c.close() self.xml() return if path.startswith('/stats'): from singleton import db c = db.cursor() c.execute("""select date(item_loaded) as date, count(*) as articles, sum(case when item_rating=1 then 1 else 0 end) as interesting, sum(case when item_rating=0 then 1 else 0 end) as unread, sum(case when item_rating=-1 then 1 else 0 end) as filtered from fm_items where item_loaded > julianday('now') - 30 group by 1 order by 1""") csvfile = cStringIO.StringIO() out = csv.writer(csvfile, dialect='excel', delimiter=',') out.writerow([col[0].capitalize() for col in c.description]) for row in c: out.writerow(row) self.browser_output(200, 'text/csv', csvfile.getvalue()) csvfile.close() c.close() return if path.endswith('.css'): path = path.replace('.css', '_css') tmpl = path.split('/', 1)[1].strip('/') self.use_template(tmpl, [self.input]) except TembozTemplate.Redirect, e: redirect_url = e.args[0] self.browser_output(301, None, 'This document has moved.', ['Location: ' + redirect_url]) return
import sys, os sys.path.append(os.getcwd()) sys.path.append('..') from singleton import db c = db.cursor() def escape(str): return str.replace("'", "''") c.execute("""select item_uid, item_link, item_feed_uid, item_guid from fm_items where item_link != item_guid""") l = c.fetchall() for uid, link, feed, guid in l: c.execute("""select item_uid from fm_items where item_link='%s' and item_feed_uid=%s""" % (link, feed)) ll = c.fetchall() ll = [x[0] for x in ll] assert uid in ll if len(ll) > 2: print('could not resolve link', link, end=' ') print('more than 2 instances:', ', '.join(map(str, ll))) continue if len(ll) < 2: continue ll.remove(uid) old_uid = ll[0] c.execute("""delete from fm_items where item_uid=%s""" % uid) c.execute("""update fm_items set item_guid='%s' where item_uid=%s""" % (escape(guid), old_uid)) db.commit()
import sys, os sys.path.append(os.getcwd()) sys.path.append('..') from singleton import db c = db.cursor() def escape(str): return str.replace("'", "''") c.execute("""select item_uid, item_link, item_feed_uid, item_guid from fm_items where item_link != item_guid""") l = c.fetchall() for uid, link, feed, guid in l: c.execute("""select item_uid from fm_items where item_link='%s' and item_feed_uid=%s""" % (link, feed)) ll = c.fetchall() ll = [x[0] for x in ll] assert uid in ll if len(ll) > 2: print 'could not resolve link', link, print 'more than 2 instances:', ', '.join(map(str, ll)) continue if len(ll) < 2: continue ll.remove(uid) old_uid = ll[0] c.execute("""delete from fm_items where item_uid=%s""" % uid) c.execute("""update fm_items set item_guid='%s' where item_uid=%s""" % (escape(guid), old_uid)) db.commit()
def add_feed(feed_xml): """Try to add a feed. Returns a tuple (feed_uid, num_added, num_filtered)""" from singleton import db c = db.cursor() feed_xml = feed_xml.replace('feed://', 'http://') try: # verify the feed f = feedparser.parse(feed_xml) # CVS versions of feedparser are not throwing exceptions as they should # see: # http://sourceforge.net/tracker/index.php?func=detail&aid=1379172&group_id=112328&atid=661937 if not f.feed or ('link' not in f.feed or 'title' not in f.feed): # some feeds have multiple links, one for self and one for PuSH if f.feed and 'link' not in f.feed and 'links' in f.feed: try: for l in f.feed['links']: if l['rel'] == 'self': f.feed['link'] = l['href'] except KeyError: pass if not f.feed or ('link' not in f.feed or 'title' not in f.feed): # try autodiscovery try: feed_xml = AutoDiscoveryHandler().feed_url(feed_xml) except HTMLParser.HTMLParseError: # in desperate conditions, regexps ride to the rescue try: feed_xml = re_autodiscovery(feed_xml)[0][1] except: util.print_stack() raise AutodiscoveryParseError if not feed_xml: raise ParseError f = feedparser.parse(feed_xml) if not f.feed: raise ParseError # we have a valid feed, normalize it normalize.normalize_feed(f) feed = { 'xmlUrl': f['url'], 'htmlUrl': str(f.feed['link']), 'etag': f.get('etag'), 'title': f.feed['title'].encode('ascii', 'xmlcharrefreplace'), 'desc': f.feed['description'].encode('ascii', 'xmlcharrefreplace') } for key, value in feed.items(): if type(value) == str: feed[key] = value filters.load_rules(db, c) try: c.execute("""insert into fm_feeds (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values (:xmlUrl, :etag, :htmlUrl, :title, :desc)""", feed) feed_uid = c.lastrowid num_added, num_filtered = process_parsed_feed(db, c, f, feed_uid) db.commit() return feed_uid, feed['title'], num_added, num_filtered except sqlite.IntegrityError, e: if 'feed_xml' in str(e): db.rollback() raise FeedAlreadyExists else: db.rollback() raise UnknownError(str(e)) finally: c.close()