示例#1
0
 def get_mentions(self, cursor):
     # ignore the keys here, they're just for show
     at = time.time()
     mentions = []
     for query in self.config.values():
         uri = "%s?%s" % (
             self.api_endpoint,
             urllib.urlencode({
                 'v': '1.0',
                 'q': query,
             }),
         )
         rq = urllib2.Request(
             uri,
             headers = { 'User-Agent': user_agent_string },
         )
         f = urllib2.urlopen(rq)
         obj = simplejson.loads(f.read())
         f.close()
         for result in obj['responseData']['results']:
             p = MySanitizer('utf-8')
             p.feed(result['content'])
             m = Mention(
                 query,
                 result['unescapedUrl'],
                 p.output(),
                 at,
             )
             mentions.append(m)
     return mentions
示例#2
0
    def get_mentions(self, cursor):
        mentions = []
        # keys here describe what we're searching for
        for key in self.config.keys():
            feed = self.url_from_config(key, self.config[key])
            #print key, '=', feed, '/', str(self.config[key])
            kwargs = {}
            cursor.execute("SELECT last_modified, etag FROM mentions_feeds WHERE uri=%s", (feed,))
            row = cursor.fetchone()
            feed_seen=False
            if row!=None:
                kwargs['etag'] = row[1]
                kwargs['modified'] = row[0]
                feed_seen=True
            kwargs['agent'] = user_agent_string
            kwargs['handlers'] = self.auth_handlers(self.config[key])
            f = feedparser.parse(feed, **kwargs)
            #print "Got feed length %i" % (len(f.entries),)
            for mentry in f.entries:
                # body/summary detection taken from toreadless.com (newspan)
                # but was written by me in the first place ;-)
                #
                # note that we're looking for the shortest thing that works
                # rather than the most content
                p = MySanitizer(f.encoding)
                body = None
                try:
                    content = mentry.content
                except AttributeError:
                    content = None
                try:
                    summary = mentry.summary
                except AttributeError:
                    summary = None
                if content:
                    body1 = content[0].value
                else:
                    body1 = ''
                if summary:
                    body2 = summary
                else:
                    body2 = ''
                if len(body1) > len(body2) and len(body2)>0:
                    body = body2
                else:
                    body = body1
                p.feed(body)
                m = Mention(
                    key,
                    mentry.link,
                    unicode(p.output(), f.encoding),
                    calendar.timegm(mentry.date_parsed))
                #print m
                mentions.append(m)
            
            etag = None
            last_modified = None
            if hasattr(f, 'etag') and f.etag:
                etag = f.etag
            if hasattr(f, 'last_modified') and f.last_modified:
                last_modified = time.strftime("%Y-%m-%d %H:%M:%S", f.modified)
            if etag or last_modified:
                if feed_seen:
                    cursor.execute(
                        "UPDATE mentions_feeds SET etag=%s, last_modified=%s WHERE uri=%s",
                        (etag, last_modified, feed,)
                    )
                else:
                    cursor.execute(
                        "INSERT INTO mentions_feeds (uri, etag, last_modified) VALUES (%s, %s, %s)",
                        (feed, etag, last_modified,)
                    )
            self.sleep()

        return mentions