Пример #1
0
 def connect_to_reddit(self, user_agent):
   """Connects this class to Reddit.  Automatically called if needed."""
   try:
     self.r = praw.Reddit(user_agent)
     print_log('ok', self.name + '.connect_to_reddit', 'Connected with user_agent \'' + user_agent +'\'')
   except Exception as e:
     print_log('error', self.name + '.connect_to_reddit', str(e))
Пример #2
0
 def open(self, URI):
   """Open a Mongo client and save the connection."""
   try:
     self.client = pymongo.MongoClient(URI)
     print_log('ok', self.name, 'Connection opened to ' + URI)
   except Exception as e:
     print_log('error', self.name, 'Connection to ' +  URI + ' failed.')
Пример #3
0
 def remove_redditor(self, username):
     """Removes all posts that are authored by a particular user.  Note that this does not remove submissions by other users which were added because they contained a comment of the specified user."""
     count = self.m.db.posts.find({'author': username}).count()
     self.m.db.posts.remove({'author': username})
     self.m.db.users.remove({'username': username})
     print_log(
         'ok', self.name + '.remove_redditor:',
         'Removed ' + str(count) + ' entries authored by ' + username + '.')
Пример #4
0
 def connect_to_reddit(self, user_agent):
     """Connects this class to Reddit.  Automatically called if needed."""
     try:
         self.r = praw.Reddit(user_agent)
         print_log('ok', self.name + '.connect_to_reddit',
                   'Connected with user_agent \'' + user_agent + '\'')
     except Exception as e:
         print_log('error', self.name + '.connect_to_reddit', str(e))
Пример #5
0
 def connect_to_database(self, UID, db):
     """Connects this class to the Mongo database.  Must be called before anything else may occur."""
     try:
         self.m = mongo_class.mongoClass(self.name + '.mongoDB')
         self.m.open(UID)
         self.m.set_db(db)
         #print_log('ok', self.name + '.connect_to_database', 'Connected to  ' + UID +'.')
     except Exception as e:
         print_log('error', self.name + '.connect_to_database', str(e))
Пример #6
0
 def connect_to_database(self, UID, db):
   """Connects this class to the Mongo database.  Must be called before anything else may occur."""
   try:
     self.m = mongo_class.mongoClass(self.name + '.mongoDB')
     self.m.open(UID)
     self.m.set_db(db)
     #print_log('ok', self.name + '.connect_to_database', 'Connected to  ' + UID +'.')
   except Exception as e:
     print_log('error', self.name + '.connect_to_database', str(e))
Пример #7
0
 def add_submission(self, reddit_id, tabs = 0, verbose = False):
   """Download a Reddit submission by reddit_id, and add it to the database.  Checks for existence first."""
   mongo = self.m.db.posts.find_one({'id': reddit_id})
   if mongo == None:
     try:
       post = self.r.get_submission(submission_id = reddit_id)
     except Exception as e:
       print_log('error', self.name + '.add_submission', str(e)+', Post ID '+reddit_id)
       return
     data = {  'id': post.id,
               'type': 'submission',
               'author': str(post.author).lower(),
               'created_utc': post.created_utc,
               'title': post.title,
               'subreddit': '%s'%post.subreddit,
               'domain': post.domain,
               'selftext': post.selftext,
               'ups': post.ups,
               'downs': post.downs,
               'gilded': post.gilded,
               'num_comments': post.num_comments
             }
     mongo_id = self.m.db.posts.insert(data)
     if verbose: print_log('ok', self.name+'.add_submission', 'Added post ID ' + reddit_id + ' to the database.', tabs=tabs)
     return [mongo_id, 1]
   else:
     if verbose: print_log('warning', self.name+'.add_submission', 'Post ID ' + reddit_id + ' already exists in the database.', tabs=tabs)
     return [mongo['_id'], 0]
Пример #8
0
 def add_submission(self, reddit_id, tabs=0, verbose=False):
     """Download a Reddit submission by reddit_id, and add it to the database.  Checks for existence first."""
     mongo = self.m.db.posts.find_one({'id': reddit_id})
     if mongo == None:
         try:
             post = self.r.get_submission(submission_id=reddit_id)
         except Exception as e:
             print_log('error', self.name + '.add_submission',
                       str(e) + ', Post ID ' + reddit_id)
             return
         data = {
             'id': post.id,
             'type': 'submission',
             'author': str(post.author).lower(),
             'created_utc': post.created_utc,
             'title': post.title,
             'subreddit': '%s' % post.subreddit,
             'domain': post.domain,
             'selftext': post.selftext,
             'ups': post.ups,
             'downs': post.downs,
             'gilded': post.gilded,
             'num_comments': post.num_comments
         }
         mongo_id = self.m.db.posts.insert(data)
         if verbose:
             print_log('ok',
                       self.name + '.add_submission',
                       'Added post ID ' + reddit_id + ' to the database.',
                       tabs=tabs)
         return [mongo_id, 1]
     else:
         if verbose:
             print_log('warning',
                       self.name + '.add_submission',
                       'Post ID ' + reddit_id +
                       ' already exists in the database.',
                       tabs=tabs)
         return [mongo['_id'], 0]
Пример #9
0
    def add_redditor(self, username, verbose=False):
        """Downloads all reddit posts by a user, and adds them to the database.  Checks for existence first."""
        start_time = time.time()
        total_submissions_added = 0
        total_comments_added = 0
        total_submissions_skipped = 0
        total_comments_skipped = 0
        total_bonus_submissions = 0
        comment_submission_ids = []
        try:
            u = self.r.get_redditor(username)
            submissions = u.get_submitted(limit=None)
        except Exception as e:
            print_log('error', self.name + '.add_redditor.get_submitted',
                      str(e))
            return None
        print colored(self.name + '.add_Redditor:', 'green')
        for post in submissions:  # Get submissions
            if not verbose:
                elapsed_time = time.time() - start_time
                m, s = divmod(elapsed_time, 60)
                h, m = divmod(m, 60)
                elapsed_time_string = '%d:%02d:%02d' % (h, m, s)
                sys.stdout.flush()
                sys.stdout.write('\r' + ' ' * 80)
                sys.stdout.write('\r\t' + colored('Added', 'green') + '/' + colored('skipped', 'yellow')+': '+colored(str(total_submissions_added), 'green') \
                              +'/'+colored(str(total_submissions_skipped), 'yellow')+' subs, ' + colored(str(total_comments_added), 'green') \
                              + '/' + colored(str(total_comments_skipped), 'yellow')+' coms, '+colored(str(total_bonus_submissions), 'green')+' bonus; ' + elapsed_time_string + ' elapsed...')
            mongo = self.m.db.posts.find_one({'id': post.id})
            if mongo == None:
                data = {
                    'id': post.id,
                    'type': 'submission',
                    'author': str(post.author).lower(),
                    'created_utc': post.created_utc,
                    'title': post.title,
                    'subreddit': '%s' % post.subreddit,
                    'domain': post.domain,
                    'selftext': post.selftext,
                    'ups': post.ups,
                    'downs': post.downs,
                    'gilded': post.gilded,
                    'num_comments': post.num_comments
                }
                mongo_id = self.m.db.posts.insert(data)
                if verbose:
                    print_log('ok', self.name + '.add_redditor:submission',
                              'Added post ID ' + post.id + ' to the database.')
                total_submissions_added += 1
                #return mongo_id    # Later functionality: consider returning list of mongo_id's added
            else:
                if verbose:
                    print_log(
                        'warning', self.name + '.add_redditor:submission',
                        'Post ID ' + post.id +
                        ' already exists in the database.')
                total_submissions_skipped += 1
                #return mongo['_id']

        # Get comments --------------------------------------
        try:
            comments = u.get_comments(limit=None)
        except Exception as e:
            print_log('error', self.name + '.add_redditor.get_comments',
                      str(e))
            return None
        for post in comments:
            if not verbose:
                elapsed_time = time.time() - start_time
                m, s = divmod(elapsed_time, 60)
                h, m = divmod(m, 60)
                elapsed_time_string = '%d:%02d:%02d' % (h, m, s)
                sys.stdout.flush()
                sys.stdout.write('\r' + ' ' * 80)
                sys.stdout.write('\r\t' + colored('Added', 'green') + '/' + colored('skipped', 'yellow')+': '+colored(str(total_submissions_added), 'green') \
                              +'/'+colored(str(total_submissions_skipped), 'yellow')+' subs, ' + colored(str(total_comments_added), 'green') \
                              + '/' + colored(str(total_comments_skipped), 'yellow')+' coms, '+colored(str(total_bonus_submissions), 'green')+' bonus; ' + elapsed_time_string + ' elapsed...')
            mongo = self.m.db.posts.find_one({'id': post.id})
            if mongo == None:
                data = {
                    'id': post.id,
                    'type': 'comment',
                    'author': str(post.author).lower(),
                    'created_utc': post.created_utc,
                    'subreddit': '%s' % post.subreddit,
                    'body': post.body,
                    'ups': post.ups,
                    'downs': post.downs,
                    'gilded': post.gilded,
                }
                mongo_id = self.m.db.posts.insert(data)
                total_comments_added += 1
                result = re.search(
                    r'http://www.reddit.com/r/(\w+)/comments/(\w+)/',
                    post.link_url)
                if result == None: continue
                submission_id = result.group(2)
                comment_submission_ids.append([mongo_id, submission_id])
                if verbose:
                    print_log('ok', self.name + '.add_redditor:comment',
                              'Added post ID ' + post.id + ' to the database.')
                #return mongo_id    # Later functionality: consider returning list of mongo_id's added
            else:
                if verbose:
                    print_log(
                        'warning', self.name + '.add_redditor:comment',
                        'Post ID ' + post.id +
                        ' already exists in the database.')
                total_comments_skipped += 1
                #return mongo['_id']

        # Now add all comments' parent threads' OP's
        if len(comment_submission_ids) > 0:
            api_url = 'http://www.reddit.com/by_id/'
            for ids in comment_submission_ids:
                api_url += 't3_' + ids[1] + ','
            i = 0
            for post in self.r.get_content(api_url, limit=None):
                if not verbose:
                    elapsed_time = time.time() - start_time
                    m, s = divmod(elapsed_time, 60)
                    h, m = divmod(m, 60)
                    elapsed_time_string = '%d:%02d:%02d' % (h, m, s)
                    sys.stdout.flush()
                    sys.stdout.write('\r' + ' ' * 80)
                    sys.stdout.write('\r\t' + colored('Added', 'green') + '/' + colored('skipped', 'yellow')+': '+colored(str(total_submissions_added), 'green') \
                                  +'/'+colored(str(total_submissions_skipped), 'yellow')+' subs, ' + colored(str(total_comments_added), 'green') \
                                  + '/' + colored(str(total_comments_skipped), 'yellow')+' coms, '+colored(str(total_bonus_submissions), 'green')+' bonus; ' + elapsed_time_string + ' elapsed...')
                mongo = self.m.db.posts.find_one({'id': post.id})
                if mongo == None:
                    data = {
                        'id': post.id,
                        'type': 'submission',
                        'author': str(post.author).lower(),
                        'created_utc': post.created_utc,
                        'title': post.title,
                        'subreddit': '%s' % post.subreddit,
                        'domain': post.domain,
                        'selftext': post.selftext,
                        'ups': post.ups,
                        'downs': post.downs,
                        'gilded': post.gilded,
                        'num_comments': post.num_comments
                    }
                    mongo_id = self.m.db.posts.insert(data)
                    self.m.db.posts.update(
                        {'_id': comment_submission_ids[i][0]},
                        {'$set': {
                            'submission_mongo_id': mongo_id
                        }})
                    i += 1
                    if verbose:
                        print_log(
                            'ok', self.name + '.add_redditor:bonus submission',
                            'Added post ID ' + post.id + ' to the database.')
                    total_bonus_submissions += 1
                    #return mongo_id    # Later functionality: consider returning list of mongo_id's added
                else:
                    if verbose:
                        print_log(
                            'warning',
                            self.name + '.add_redditor:bonus submission',
                            'Post ID ' + post.id +
                            ' already exists in the database.')
                    # ------------Code goes here to insert link to existing post
                    i += 1
                    #return mongo['_id']

        elapsed_time = time.time() - start_time
        m, s = divmod(elapsed_time, 60)
        h, m = divmod(m, 60)
        elapsed_time_string = '%d:%02d:%02d' % (h, m, s)
        if verbose: color = 'cyan'
        else: color = 'green'
        sys.stdout.write('\r' + ' ' * 80)
        sys.stdout.write('\r\t' + colored('Added', 'green') + '/' + colored('skipped', 'yellow')+': '+colored(str(total_submissions_added), 'green') \
                      +'/'+colored(str(total_submissions_skipped), 'yellow')+' subs, ' + colored(str(total_comments_added), 'green') \
                      + '/' + colored(str(total_comments_skipped), 'yellow')+' coms, '+colored(str(total_bonus_submissions), 'green')+' bonus; ' + elapsed_time_string + ' elapsed...Finished.\n')
        if total_submissions_added > 0 or total_comments_added > 0:  # If we did something, add to the username database too
            if self.m.db.users.find_one({'username': username}) == None:
                self.m.db.users.insert({'username': username})
                # IDEA: also include the time, so we can keep track of updating account info
        return 'Added ' + username + ': ' + str(
            total_submissions_added) + '/' + str(
                total_submissions_skipped) + ' submissions and ' + str(
                    total_comments_added) + '/' + str(
                        total_comments_skipped) + ' comments.'
Пример #10
0
  def add_redditor(self, username, verbose = False):
    """Downloads all reddit posts by a user, and adds them to the database.  Checks for existence first."""
    start_time = time.time()
    total_submissions_added = 0
    total_comments_added = 0
    total_submissions_skipped = 0
    total_comments_skipped = 0
    total_bonus_submissions = 0
    comment_submission_ids = []
    try:
      u = self.r.get_redditor(username)
      submissions = u.get_submitted(limit=None)
    except Exception as e:
      print_log('error', self.name + '.add_redditor.get_submitted', str(e))
      return None
    print colored(self.name + '.add_Redditor:', 'green')
    for post in submissions:                # Get submissions
      if not verbose:
        elapsed_time = time.time()-start_time
        m, s = divmod(elapsed_time, 60)
        h, m = divmod(m, 60)
        elapsed_time_string = '%d:%02d:%02d' % (h, m, s)        
        sys.stdout.flush()
        sys.stdout.write('\r'+' '*80)
        sys.stdout.write('\r\t' + colored('Added', 'green') + '/' + colored('skipped', 'yellow')+': '+colored(str(total_submissions_added), 'green') \
                      +'/'+colored(str(total_submissions_skipped), 'yellow')+' subs, ' + colored(str(total_comments_added), 'green') \
                      + '/' + colored(str(total_comments_skipped), 'yellow')+' coms, '+colored(str(total_bonus_submissions), 'green')+' bonus; ' + elapsed_time_string + ' elapsed...')
      mongo = self.m.db.posts.find_one({'id': post.id})
      if mongo == None:
        data = {  'id': post.id,
                  'type': 'submission',
                  'author': str(post.author).lower(),
                  'created_utc': post.created_utc,
                  'title': post.title,
                  'subreddit': '%s'%post.subreddit,
                  'domain': post.domain,
                  'selftext': post.selftext,
                  'ups': post.ups,
                  'downs': post.downs,
                  'gilded': post.gilded,
                  'num_comments': post.num_comments
                }
        mongo_id = self.m.db.posts.insert(data)
        if verbose: print_log('ok', self.name+'.add_redditor:submission', 'Added post ID ' + post.id + ' to the database.')
        total_submissions_added += 1
        #return mongo_id    # Later functionality: consider returning list of mongo_id's added
      else:
        if verbose: print_log('warning', self.name+'.add_redditor:submission', 'Post ID ' + post.id + ' already exists in the database.')
        total_submissions_skipped += 1
        #return mongo['_id']
        
    # Get comments --------------------------------------
    try:
      comments = u.get_comments(limit=None)
    except Exception as e:
      print_log('error', self.name + '.add_redditor.get_comments', str(e))
      return None
    for post in comments:
      if not verbose:
        elapsed_time = time.time()-start_time
        m, s = divmod(elapsed_time, 60)
        h, m = divmod(m, 60)
        elapsed_time_string = '%d:%02d:%02d' % (h, m, s)
        sys.stdout.flush()
        sys.stdout.write('\r'+' '*80)
        sys.stdout.write('\r\t' + colored('Added', 'green') + '/' + colored('skipped', 'yellow')+': '+colored(str(total_submissions_added), 'green') \
                      +'/'+colored(str(total_submissions_skipped), 'yellow')+' subs, ' + colored(str(total_comments_added), 'green') \
                      + '/' + colored(str(total_comments_skipped), 'yellow')+' coms, '+colored(str(total_bonus_submissions), 'green')+' bonus; ' + elapsed_time_string + ' elapsed...')
      mongo = self.m.db.posts.find_one({'id': post.id})
      if mongo == None:
        data = {  'id': post.id,
                  'type': 'comment',
                  'author': str(post.author).lower(),
                  'created_utc': post.created_utc,
                  'subreddit': '%s'%post.subreddit,
                  'body': post.body,
                  'ups': post.ups,
                  'downs': post.downs,
                  'gilded': post.gilded,
                }
        mongo_id = self.m.db.posts.insert(data)
        total_comments_added += 1        
        result = re.search(r'http://www.reddit.com/r/(\w+)/comments/(\w+)/' , post.link_url)
        if result == None: continue
        submission_id = result.group(2)
        comment_submission_ids.append([mongo_id, submission_id])
        if verbose: print_log('ok', self.name+'.add_redditor:comment', 'Added post ID ' + post.id + ' to the database.')
        #return mongo_id    # Later functionality: consider returning list of mongo_id's added
      else:
        if verbose: print_log('warning', self.name+'.add_redditor:comment', 'Post ID ' + post.id + ' already exists in the database.')
        total_comments_skipped += 1
        #return mongo['_id']
        
    # Now add all comments' parent threads' OP's
    if len(comment_submission_ids) > 0:
      api_url = 'http://www.reddit.com/by_id/'
      for ids in comment_submission_ids:
        api_url += 't3_'+ids[1]+','
      i = 0
      for post in self.r.get_content(api_url, limit=None):
        if not verbose:
          elapsed_time = time.time()-start_time
          m, s = divmod(elapsed_time, 60)
          h, m = divmod(m, 60)
          elapsed_time_string = '%d:%02d:%02d' % (h, m, s)        
          sys.stdout.flush()
          sys.stdout.write('\r'+' '*80)
          sys.stdout.write('\r\t' + colored('Added', 'green') + '/' + colored('skipped', 'yellow')+': '+colored(str(total_submissions_added), 'green') \
                        +'/'+colored(str(total_submissions_skipped), 'yellow')+' subs, ' + colored(str(total_comments_added), 'green') \
                        + '/' + colored(str(total_comments_skipped), 'yellow')+' coms, '+colored(str(total_bonus_submissions), 'green')+' bonus; ' + elapsed_time_string + ' elapsed...')
        mongo = self.m.db.posts.find_one({'id': post.id})
        if mongo == None:
          data = {  'id': post.id,
                    'type': 'submission',
                    'author': str(post.author).lower(),
                    'created_utc': post.created_utc,
                    'title': post.title,
                    'subreddit': '%s'%post.subreddit,
                    'domain': post.domain,
                    'selftext': post.selftext,
                    'ups': post.ups,
                    'downs': post.downs,
                    'gilded': post.gilded,
                    'num_comments': post.num_comments
                  }
          mongo_id = self.m.db.posts.insert(data)
          self.m.db.posts.update({'_id':comment_submission_ids[i][0]}, {'$set': {'submission_mongo_id': mongo_id}})
          i += 1
          if verbose: print_log('ok', self.name+'.add_redditor:bonus submission', 'Added post ID ' + post.id + ' to the database.')
          total_bonus_submissions += 1
          #return mongo_id    # Later functionality: consider returning list of mongo_id's added
        else:
          if verbose: print_log('warning', self.name+'.add_redditor:bonus submission', 'Post ID ' + post.id + ' already exists in the database.')
          # ------------Code goes here to insert link to existing post
          i+=1
          #return mongo['_id']

    elapsed_time = time.time()-start_time
    m, s = divmod(elapsed_time, 60)
    h, m = divmod(m, 60)
    elapsed_time_string = '%d:%02d:%02d' % (h, m, s)
    if verbose: color = 'cyan'
    else: color = 'green'
    sys.stdout.write('\r'+' '*80)
    sys.stdout.write('\r\t' + colored('Added', 'green') + '/' + colored('skipped', 'yellow')+': '+colored(str(total_submissions_added), 'green') \
                  +'/'+colored(str(total_submissions_skipped), 'yellow')+' subs, ' + colored(str(total_comments_added), 'green') \
                  + '/' + colored(str(total_comments_skipped), 'yellow')+' coms, '+colored(str(total_bonus_submissions), 'green')+' bonus; ' + elapsed_time_string + ' elapsed...Finished.\n')
    if total_submissions_added>0 or total_comments_added>0:   # If we did something, add to the username database too
      if self.m.db.users.find_one({'username':username}) == None:
        self.m.db.users.insert({'username':username})
        # IDEA: also include the time, so we can keep track of updating account info
    return 'Added '+username+': '+str(total_submissions_added)+'/'+str(total_submissions_skipped)+' submissions and '+str(total_comments_added)+'/'+str(total_comments_skipped)+' comments.'
Пример #11
0
 def remove_redditor(self, username):
   """Removes all posts that are authored by a particular user.  Note that this does not remove submissions by other users which were added because they contained a comment of the specified user."""
   count = self.m.db.posts.find({'author':username}).count()
   self.m.db.posts.remove({'author':username})
   self.m.db.users.remove({'username':username})
   print_log('ok', self.name+'.remove_redditor:', 'Removed '+str(count)+' entries authored by '+username+'.')
Пример #12
0
 def close(self):
   """Close a Mongo client connection."""
   self.client.fsync()
   self.client.disconnect()
   self.client = self.db = self.collection = None
   print_log('ok', self.name, 'Connection closed.')