Пример #1
0
def purge_reload(feed_uid):
  reload(transform)
  feed_uid = int(feed_uid)
  if feed_uid in feed_guid_cache:
    del feed_guid_cache[feed_uid]
  from singleton import db
  c = db.cursor()
  try:
    # refresh filtering rules
    filters.load_rules(db, c)
    c.execute("delete from fm_items where item_feed_uid=? and item_rating=0",
              [feed_uid])
    c.execute("""delete from fm_tags
    where exists (
      select item_uid from fm_items
      where item_uid=tag_item_uid and item_feed_uid=? and item_rating=0
    )""", [feed_uid])
    c.execute("""update fm_feeds set feed_modified=NULL, feed_etag=NULL
    where feed_uid=?""", [feed_uid])
    c.execute("select feed_xml from fm_feeds where feed_uid=?", [feed_uid])
    feed_xml = c.fetchone()[0]
    db.commit()
    f = feedparser.parse(feed_xml)
    if not f.feed:
      raise ParseError
    normalize.normalize_feed(f)
    clear_errors(db, c, feed_uid, f)
    filters.load_rules(db, c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
  finally:
    c.close()
Пример #2
0
 def apply(self, content, *args, **kwargs):
   item = args[1]
   if self.link_substr in item['link']:
     try:
       # check if this item has not already been loaded before
       guid = item['id']
       from singleton import db, sqlite
       c = db.cursor()
       if sqlite.paramstyle == 'qmark':
         c.execute("select item_link from fm_items where item_guid=?", [guid])
       elif sqlite.paramstyle == 'pyformat':
         c.execute("select item_link from fm_items where item_guid=%guid)s",
                   {'guid': guid})
       link = c.fetchone()
       c.close()
       if link:
         print >> param.log, 'not dereferencing', guid, '->', link[0]
         item['link'] = link[0]
         return content
       # we haven't seen this article before, buck up and load it
       deref = urllib2.urlopen(item['link']).read()
       m = self.re.search(deref)
       if m and m.groups():
         item['link'] = m.groups()[0]
     except:
       util.print_stack()
   return content
Пример #3
0
def update_feed_xml(feed_uid, feed_xml):
  """Update a feed URL and fetch the feed. Returns the number of new items"""
  feed_uid = int(feed_uid)

  f = feedparser.parse(feed_xml)
  if not f.feed:
    raise ParseError
  normalize.normalize_feed(f)

  from singleton import db
  c = db.cursor()
  clear_errors(db, c, feed_uid, f)
  try:
    try:
      c.execute("update fm_feeds set feed_xml=?, feed_html=? where feed_uid=?",
                [feed_xml, str(f.feed['link']), feed_uid])
    except sqlite.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
    filters.load_rules(db, c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
    return num_added
Пример #4
0
  def run(self):
    from singleton import db
    c = db.cursor()
    while True:
      item_uid, rating = self.in_q.get()
      try:
        c.execute("""update fm_items
        set item_rating=?, item_rated=julianday('now')
        where item_uid=?""", [rating, item_uid])
        fb_token = param.settings.get('fb_token', None)
        if rating == 1 and fb_token:
          c.execute("""select feed_uid, item_link, item_title, feed_private
          from fm_items, fm_feeds
          where item_uid=? and feed_uid=item_feed_uid""",
                    [item_uid])
          feed_uid, url, title, private = c.fetchone()
        db.commit()
        if rating == 1 and fb_token and not private:
          callout = random.choice(
            ['Interesting: ', 'Notable: ', 'Recommended: ', 'Thumbs-up: ',
             'Noteworthy: ', 'FYI: ', 'Worth reading: '])
          try:
            social.fb_post(fb_token, callout + title, url)
          except social.ExpiredToken:
            notification(db, c, feed_uid, 'Service notification',
              'The Facebook access token has expired',
              link='/settings#facebook')

      except:
        util.print_stack()
    # this will never be reached
    c.close()
Пример #5
0
def title_url(feed_uid):
  feed_uid = int(feed_uid)
  from singleton import db
  c = db.cursor()
  try:
    c.execute("select feed_title, feed_html from fm_feeds where feed_uid=?",
              [feed_uid])
    return c.fetchone()
  finally:
    c.close()
Пример #6
0
def catch_up(feed_uid):
  feed_uid = int(feed_uid)
  from singleton import db
  c = db.cursor()
  try:
    c.execute("""update fm_items set item_rating=-1
    where item_feed_uid=? and item_rating=0""", [feed_uid])
    db.commit()
  finally:
    c.close()
Пример #7
0
def link_already(url):
  from singleton import db
  print >> param.activity, 'checking for deja-vu for', url,
  c = db.cursor()
  c.execute("select count(*) from fm_items where item_link like ?",
            [url + '%'])
  l = c.fetchone()
  c.close()
  print >> param.log, l and l[0]
  return l and l[0]
Пример #8
0
def update_item(item_uid, link, title, content):
  item_uid = int(item_uid)
  from singleton import db
  c = db.cursor()
  try:
    c.execute("""update fm_items set item_link=?, item_title=?, item_content=?
    where item_uid=?""", [link, title, content, item_uid])
    db.commit()
  finally:
    c.close()
Пример #9
0
def cleanup(db=None, c=None):
  """garbage collection - see param.py
  this is done only once a day between 3 and 4 AM as this is quite intensive
  and could interfere with user activity
  It can also be invoked by running temboz --clean
  """
  if not db:
    from singleton import db
    c = db.cursor()
  from singleton import sqlite_cli
  if getattr(param, 'garbage_contents', False):
    c.execute("""update fm_items set item_content=''
    where item_rating < 0 and item_created < julianday('now')-?""",
              [param.garbage_contents])
    db.commit()
  if getattr(param, 'garbage_items', False):
    c.execute("""delete from fm_items where item_uid in (
      select item_uid from fm_items, fm_feeds
      where item_created < min(julianday('now')-?, feed_oldest-7)
      and item_rating<0 and feed_uid=item_feed_uid)""", [param.garbage_items])
    db.commit()
  singleton.snr_mv(db, c)
  c.execute("""delete from fm_tags
  where not exists(
    select item_uid from fm_items where item_uid=tag_item_uid
  )""")
  db.commit()
  c.execute('vacuum')
  # we still hold the PseudoCursor lock, this is a good opportunity to backup
  try:
    os.mkdir('backups')
  except OSError:
    pass
  prune_feed_guid_cache()
  os.system((sqlite_cli + ' rss.db .dump | %s > backups/daily_' \
             + time.strftime('%Y-%m-%d') + '%s') % param.backup_compressor)
  # rotate the log
  os.rename(param.log_filename, 'backups/log_' + time.strftime('%Y-%m-%d'))
  param.log.close()
  param.log = open(param.log_filename, 'a', 0)
  os.dup2(param.log.fileno(), 1)
  os.dup2(param.log.fileno(), 2)
  # delete old backups
  backup_re = re.compile(
    'daily_[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]\\.')
  log_re = re.compile(
    'log_[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]')
  for fn in os.listdir('backups'):
    if backup_re.match(fn) or log_re.match(fn):
      elapsed = time.time() - os.stat('backups/' + fn).st_ctime
      if elapsed > 86400 * param.daily_backups:
        try:
          os.remove('backups/' + fn)
        except OSError:
          pass
Пример #10
0
def update_feed_private(feed_uid, private):
  feed_uid = int(feed_uid)
  private = int(bool(private))
  from singleton import db
  c = db.cursor()
  try:
    c.execute("update fm_feeds set feed_private=? where feed_uid=?",
              [private, feed_uid])
    db.commit()
  finally:
    c.close()
Пример #11
0
def set_status(feed_uid, status):
  feed_uid = int(feed_uid)
  status = int(status)
  from singleton import db
  c = db.cursor()
  try:
    c.execute("update fm_feeds set feed_status=? where feed_uid=?",
              [status, feed_uid])
    db.commit()
  finally:
    c.close()
Пример #12
0
def hard_purge(feed_uid):
  feed_uid = int(feed_uid)
  from singleton import db
  c = db.cursor()
  try:
    c.execute("delete from fm_items where item_feed_uid=?", [feed_uid])
    c.execute("delete from fm_rules where rule_feed_uid=?", [feed_uid])
    c.execute("delete from fm_feeds where feed_uid=?", [feed_uid])
    db.commit()
  finally:
    c.close()
    filters.invalidate()
Пример #13
0
def update_feed_html(feed_uid, feed_html):
  """Update a feed HTML link"""
  feed_uid = int(feed_uid)

  from singleton import db
  c = db.cursor()
  try:
    c.execute("update fm_feeds set feed_html=? where feed_uid=?",
              [feed_html, feed_uid])
    db.commit()
  finally:
    c.close()
Пример #14
0
def update_feed_desc(feed_uid, feed_desc):
  """Update a feed desc"""
  feed_uid = int(feed_uid)

  from singleton import db
  c = db.cursor()
  try:
    c.execute("update fm_feeds set feed_desc=? where feed_uid=?",
              [feed_desc, feed_uid])
    db.commit()
  finally:
    c.close()
Пример #15
0
def update_feed_dupcheck(feed_uid, dupcheck):
  feed_uid = int(feed_uid)
  dupcheck = int(bool(dupcheck))
  # XXX run a dupcheck pass retroactively here if dupcheck == 1
  from singleton import db
  c = db.cursor()
  try:
    c.execute("update fm_feeds set feed_dupcheck=? where feed_uid=?",
              [dupcheck, feed_uid])
    db.commit()
  finally:
    c.close()
Пример #16
0
def update_feed_exempt(feed_uid, exempt):
  feed_uid = int(feed_uid)
  exempt = int(bool(exempt))
  from singleton import db
  c = db.cursor()
  try:
    c.execute("update fm_feeds set feed_exempt=? where feed_uid=?",
              [exempt, feed_uid])
    if exempt:
      filters.exempt_feed_retroactive(db, c, feed_uid)
    db.commit()
  finally:
    c.close()
Пример #17
0
def run():
  # force loading of the database so we don't have to wait an hour to detect
  # a database format issue
  from singleton import db
  c = db.cursor()
  update.load_settings(db, c)
  c.close()
  
  logging.getLogger().setLevel(logging.INFO)
  server = Server((getattr(param, 'bind_address', ''), param.port), Handler)
  pidfile = open('temboz.pid', 'w')
  print >> pidfile, os.getpid()
  pidfile.close()
  server.serve_forever()
Пример #18
0
def import_opml(opml_file):
  tree = parse_opml(opml_file)
  from singleton import db
  c = db.cursor()
  ok = 0
  dup = 0
  for feed in tree:
    feed['feed_etag'] = ''
    try:
      c.execute("""insert into fm_feeds
      (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values
      (:xmlUrl, :feed_etag, :htmlUrl, :title, :desc)""", feed)
      ok += 1
    except sqlite.IntegrityError, e:
      if 'feed_xml' not in str(e):
        raise
      dup += 1
Пример #19
0
def update(where_clause=''):
  from singleton import db
  c = db.cursor()
  # refresh filtering rules
  filters.load_rules(db, c)
  # at 3AM by default, perform house-cleaning
  if time.localtime()[3] == param.backup_hour:
    cleanup(db, c)
  # create worker threads and the queues used to communicate with them
  work_q = Queue.Queue()
  process_q = Queue.Queue()
  workers = []
  for i in range(param.feed_concurrency):
    workers.append(FeedWorker(i + 1, work_q, process_q))
    workers[-1].start()
  # assign work
  c.execute("""select feed_uid, feed_xml, feed_etag, feed_dupcheck,
  strftime('%s', feed_modified) from fm_feeds where feed_status=0 """
            + where_clause)
  for feed_uid, feed_xml, feed_etag, feed_dupcheck, feed_modified in c:
    if feed_modified:
      feed_modified = float(feed_modified)
      feed_modified = time.localtime(feed_modified)
    else:
      feed_modified = None
    work_q.put((feed_uid, feed_xml, feed_etag, feed_modified, feed_dupcheck))
  # None is an indication for workers to stop
  for i in range(param.feed_concurrency):
    work_q.put(None)
  workers_left = param.feed_concurrency
  while workers_left > 0:
    feed_info = process_q.get()
    # exited worker
    if not feed_info:
      workers_left -= 1
    else:
      try:
        update_feed(db, c, *feed_info)
      except:
        util.print_stack()
      db.commit()
    # give reader threads an opportunity to get their work done
    time.sleep(1)
  c.close()
Пример #20
0
def update_feed_filter(feed_uid, feed_filter):
  """Update a feed desc"""
  feed_uid = int(feed_uid)
  feed_filter = feed_filter.strip()
  if feed_filter:
    # check syntax
    compile(filters.normalize_rule(feed_filter), 'web form', 'eval')
    val = feed_filter
  else:
    val = None
  from singleton import db
  c = db.cursor()
  try:
    c.execute("update fm_feeds set feed_filter=? where feed_uid=?",
              [val, feed_uid])
    db.commit()
    filters.invalidate()
  finally:
    c.close()
Пример #21
0
  def process_request(self):
    try:
      if self.path in ['', '/']:
        self.browser_output(301, None, 'This document has moved.',
                            ['Location: /view'])
        return
      path, query_string = urlparse.urlparse(self.path)[2:5:2]
      vars = []
      if query_string:
        # parse_qsl does not comply with RFC 3986, we have to decode UTF-8
        query_list = [(n, v.decode('UTF-8'))
                      for n, v in urlparse.parse_qsl(query_string, 1)]
        self.input.update(dict(query_list))

      if param.debug:
        logging.info((self.command, self.path, self.request_version, vars))

      if path.endswith('.gif') and path[1:] in self.images:
        self.browser_output(200, 'image/gif', self.images[path[1:]],
                            http_headers=no_expire)
        return

      if path.endswith('.js') and path[1:] in self.rsrc:
        self.browser_output(200, 'text/javascript', self.rsrc[path[1:]],
                            http_headers=no_expire)
        return

      if path.startswith('/tiny_mce'):
        # guard against attempts to subvert security using ../
        path = os.path.normpath('.' + path)
        assert path.startswith('tiny_mce')
        self.set_mime_type(path)
        self.browser_output(200, self.mime_type, open(path).read(),
                            http_headers=no_expire)
        return

      if path.count('favicon.ico') > 0:
        self.favicon()

      if path.endswith('.css'):
        path = path.replace('.css', '_css')
        tmpl = path.split('/', 1)[1].strip('/')
        self.use_template(tmpl, [self.input])

      if not self.require_auth(param.auth_dict):
        return
      
      if path.startswith('/redirect/'):
        from singleton import db
        c = db.cursor()
        item_uid = int(path[10:])
        c.execute('select item_link from fm_items where item_uid=%d'
                  % item_uid)
        redirect_url = c.fetchone()[0]
        c.close()
        self.browser_output(301, None, 'This document has moved.',
                            ['Location: ' + redirect_url])
        return

      if path.startswith('/threads'):
        frames = sys._current_frames()
        row = 0
        out = []
        if singleton.c_opened:
          out.append('<h1>Open Cursors</h1>\n')
          for curs, tb in singleton.c_opened.iteritems():
            if curs not in singleton.c_closed:
              row += 1
              if row % 2:
                color = '#ddd'
              else:
                color = 'white'
              out.append('<div style="background-color: ' + color + '">\n<pre>')
              out.append(curs.replace('<', '&lt;').replace('>', '&gt;') + '\n')
              out.append('\n'.join(tb[:-2]))
              out.append('</pre></div>\n')
        out.append('<h1>Threads</h1>\n')
        row = 0
        for thread_id, frame in sorted(frames.iteritems()):
          if thread_id == threading.currentThread()._Thread__ident:
            continue
          row += 1
          if row % 2:
            color = '#ddd'
          else:
            color = 'white'
          out.append('<div style="background-color: ' + color + '">\n<pre>')
          out.append('Thread %s (%d refs)\n'
                     % (thread_id, sys.getrefcount(frame)))
          out.append(''.join(traceback.format_stack(frame)).replace(
            '&', '&amp;').replace('<', '&lt;').replace('>', '&gt;'))
          out.append('\n<hr>\n')
          out.append(pprint.pformat(frame.f_locals).replace(
            '&', '&amp;').replace('<', '&lt;').replace('>', '&gt;'))
          out.append('\n</pre>\n</div>\n')
        del frames
        self.browser_output(200, 'text/html', ''.join(out))
        return

      if path.startswith('/xmlfeedback/'):
        op, item_uid = path.split('/')[2::2]
        item_uid = item_uid.split('.')[0]
        # for safety, these operations should be idempotent
        if op in ['promote', 'demote', 'basic', 'yappi']:
          if op != 'yappi':
            item_uid = int(item_uid)
          getattr(self, 'op_' + op)(item_uid)
        self.xml()
        return

      if path.startswith('/stem'):
        txt = self.input['q']
        stem = ' '.join(normalize.stem(normalize.get_words(txt)))
        self.browser_output(200, 'text/plain', stem)
        return

      if path.startswith('/add_kw_rule'):
        from singleton import db
        c = db.cursor()
        try:
          filters.add_kw_rule(db, c, **self.input)
        except:
          util.print_stack()
        db.commit()
        c.close()
        self.xml()
        return

      if path.startswith('/del_kw_rule'):
        from singleton import db
        c = db.cursor()
        try:
          filters.del_kw_rule(db, c, **self.input)
        except:
          util.print_stack()
        db.commit()
        c.close()
        self.xml()
        return

      if path.startswith('/stats'):
        from singleton import db
        c = db.cursor()
        c.execute("""select date(item_loaded) as date, count(*) as articles,
        sum(case when item_rating=1 then 1 else 0 end) as interesting,
        sum(case when item_rating=0 then 1 else 0 end) as unread,
        sum(case when item_rating=-1 then 1 else 0 end) as filtered
        from fm_items
        where item_loaded > julianday('now') - 30
        group by 1 order by 1""")
        csvfile = cStringIO.StringIO()
        out = csv.writer(csvfile, dialect='excel', delimiter=',')
        out.writerow([col[0].capitalize() for col in c.description])
        for row in c:
          out.writerow(row)
        self.browser_output(200, 'text/csv', csvfile.getvalue())
        csvfile.close()
        c.close()
        return

      if path.endswith('.css'):
        path = path.replace('.css', '_css')

      tmpl = path.split('/', 1)[1].strip('/')
      self.use_template(tmpl, [self.input])
    except TembozTemplate.Redirect, e:
      redirect_url = e.args[0]
      self.browser_output(301, None, 'This document has moved.',
                          ['Location: ' + redirect_url])
      return
Пример #22
0
import sys, os
sys.path.append(os.getcwd())
sys.path.append('..')
from singleton import db
c = db.cursor()

def escape(str):
  return str.replace("'", "''")

c.execute("""select item_uid, item_link, item_feed_uid, item_guid
from fm_items
where item_link != item_guid""")
l = c.fetchall()
for uid, link, feed, guid in l:
  c.execute("""select item_uid from fm_items
  where item_link='%s' and item_feed_uid=%s""" % (link, feed))
  ll = c.fetchall()
  ll = [x[0] for x in ll]
  assert uid in ll
  if len(ll) > 2:
    print('could not resolve link', link, end=' ')
    print('more than 2 instances:', ', '.join(map(str, ll)))
    continue
  if len(ll) < 2: continue
  ll.remove(uid)
  old_uid = ll[0]
  c.execute("""delete from fm_items where item_uid=%s""" % uid)
  c.execute("""update fm_items set item_guid='%s' where item_uid=%s"""
            % (escape(guid), old_uid))
  db.commit()
Пример #23
0
import sys, os
sys.path.append(os.getcwd())
sys.path.append('..')
from singleton import db
c = db.cursor()

def escape(str):
  return str.replace("'", "''")

c.execute("""select item_uid, item_link, item_feed_uid, item_guid
from fm_items
where item_link != item_guid""")
l = c.fetchall()
for uid, link, feed, guid in l:
  c.execute("""select item_uid from fm_items
  where item_link='%s' and item_feed_uid=%s""" % (link, feed))
  ll = c.fetchall()
  ll = [x[0] for x in ll]
  assert uid in ll
  if len(ll) > 2:
    print 'could not resolve link', link,
    print 'more than 2 instances:', ', '.join(map(str, ll))
    continue
  if len(ll) < 2: continue
  ll.remove(uid)
  old_uid = ll[0]
  c.execute("""delete from fm_items where item_uid=%s""" % uid)
  c.execute("""update fm_items set item_guid='%s' where item_uid=%s"""
            % (escape(guid), old_uid))
  db.commit()
Пример #24
0
def add_feed(feed_xml):
  """Try to add a feed. Returns a tuple (feed_uid, num_added, num_filtered)"""
  from singleton import db
  c = db.cursor()
  feed_xml = feed_xml.replace('feed://', 'http://')
  try:
    # verify the feed
    f = feedparser.parse(feed_xml)
    # CVS versions of feedparser are not throwing exceptions as they should
    # see:
    # http://sourceforge.net/tracker/index.php?func=detail&aid=1379172&group_id=112328&atid=661937
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # some feeds have multiple links, one for self and one for PuSH
      if f.feed and 'link' not in f.feed and 'links' in f.feed:
        try:
          for l in f.feed['links']:
            if l['rel'] == 'self':
              f.feed['link'] = l['href']
        except KeyError:
          pass
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # try autodiscovery
      try:
        feed_xml = AutoDiscoveryHandler().feed_url(feed_xml)
      except HTMLParser.HTMLParseError:
        # in desperate conditions, regexps ride to the rescue
        try:
          feed_xml = re_autodiscovery(feed_xml)[0][1]
        except:
          util.print_stack()
          raise AutodiscoveryParseError
      if not feed_xml:
        raise ParseError
      f = feedparser.parse(feed_xml)
      if not f.feed:
        raise ParseError
    # we have a valid feed, normalize it
    normalize.normalize_feed(f)
    feed = {
      'xmlUrl': f['url'],
      'htmlUrl': str(f.feed['link']),
      'etag': f.get('etag'),
      'title': f.feed['title'].encode('ascii', 'xmlcharrefreplace'),
      'desc': f.feed['description'].encode('ascii', 'xmlcharrefreplace')
      }
    for key, value in feed.items():
      if type(value) == str:
        feed[key] = value
    filters.load_rules(db, c)
    try:
      c.execute("""insert into fm_feeds
      (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values
      (:xmlUrl, :etag, :htmlUrl, :title, :desc)""", feed)
      feed_uid = c.lastrowid
      num_added, num_filtered = process_parsed_feed(db, c, f, feed_uid)
      db.commit()
      return feed_uid, feed['title'], num_added, num_filtered
    except sqlite.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
  finally:
    c.close()