示例#1
0
 def _scan_comments(self, url, newest):
     seen = 0
     data = self.spider._get_json(url)
     comments = data['data']['children']
     after = data['data']['after']
     for i, c in enumerate(comments):
         comment = c['data']
         if i == 0:
             next_newest = comment['id']
         if comment['id'] <= newest:
             seen = len(comments) - i - 1
             break
         body = comment['body'].lower()
         for k in self._mentioned_keywords(body):
             mention = Mention()
             mention.keyword_uid = k.uid
             mention.thread_id = comment['link_id'][3:]
             mention.comment_id = comment['id']
             mention.author = comment['author']
             mention.subreddit = comment['subreddit']
             mention.created = unix_string(int(comment['created_utc']))
     session.commit()
     return (seen, after, next_newest)
示例#2
0
 def _scan_comments(self, url, newest):
     seen = 0
     data = self.spider._get_json(url) 
     comments = data['data']['children']
     after = data['data']['after']
     for i, c in enumerate(comments):
         comment = c['data']
         if i == 0:
             next_newest = comment['id']
         if comment['id'] <= newest:
             seen = len(comments) - i - 1
             break
         body = comment['body'].lower()
         for k in self._mentioned_keywords(body):
             mention = Mention()
             mention.keyword_uid = k.uid
             mention.thread_id = comment['link_id'][3:]
             mention.comment_id = comment['id']
             mention.author = comment['author']
             mention.subreddit = comment['subreddit']
             mention.created = unix_string(int(comment['created_utc']))
     session.commit()
     return (seen, after, next_newest)
示例#3
0
 def _scan_posts(self, url, newest):
     seen = 0
     data = self.spider._get_json(url)
     posts = data['data']['children']
     after = data['data']['after']
     for i, c in enumerate(posts):
         post = c['data']
         if i == 0:
             next_newest = post['id']
         if post['id'] <= newest:
             seen = len(posts) - i
             break
         title = post['title'].lower()
         selftext = post['selftext'].lower()
         for k in self._mentioned_keywords(title, text2=selftext):
             mention = Mention()
             mention.keyword_uid = k.uid
             mention.thread_id = post['id']
             mention.author = post['author']
             mention.subreddit = post['subreddit']
             mention.created = unix_string(int(post['created_utc']))
     session.commit()
     return (seen, after, next_newest)
示例#4
0
 def _scan_posts(self, url, newest):
     seen = 0
     data = self.spider._get_json(url) 
     posts = data['data']['children']
     after = data['data']['after']
     for i, c in enumerate(posts):
         post = c['data']
         if i == 0:
             next_newest = post['id']
         if post['id'] <= newest:
             seen = len(posts) - i
             break
         title = post['title'].lower()
         selftext = post['selftext'].lower()
         for k in self._mentioned_keywords(title, text2=selftext):
             mention = Mention()
             mention.keyword_uid = k.uid
             mention.thread_id = post['id']
             mention.author = post['author']
             mention.subreddit = post['subreddit']
             mention.created = unix_string(int(post['created_utc']))
     session.commit()
     return (seen, after, next_newest)