def main(): reddit = Reddit(config.data_location) # following code explores saving user posts per user for user in reddit.get_users(): #os.mkdir('../tmp/{user.name}') with open(f'../tmp/{user.name}.csv', 'w') as fp: csv_file = csv.writer(fp) csv_file.writerow(['SeqId', 'InstNo', 'Author', 'Text']) for post in user.posts: #with open(f'../tmp/{user.name}.csv', 'w') as fp: if 'selftext' in post and post['selftext'] and post[ 'selftext'] != '[removed]' and post[ 'subreddit'] != 'makeupexchange': content_post = post.get('selftext').replace('\n', ' ') content_post = re.sub(r'\[.*?\]\(http\S+\)|http\S+', '', content_post, flags=re.MULTILINE) content_post = nltk.tokenize.word_tokenize(content_post) content_post = ' '.join(content_post) content_post = nltk.tokenize.sent_tokenize(content_post) for i in range(len(content_post) - 1): content_post[i] = content_post[i] + ' <SENT>' content_post = ' '.join(content_post) csv_file.writerow( [post.get('id'), 0, user.name, content_post])
def main(subreddit_list): reddit = Reddit(config.data_location) subreddits = { subredit.strip().split("/")[-1] for subredit in subreddit_list } for subreddit in subreddits: sub = reddit.get_subreddit(subreddit) with open(f'../acl/{subreddit}_user_perline.csv', 'w') as fp: csv_file = csv.writer(fp) csv_file.writerow(['SeqId', 'InstNo', 'Author', 'Text']) for post in sub.posts: if 'selftext' in post and post['selftext'] and post[ 'selftext'] != '[removed]' and post[ 'author'] != '[deleted]' and post[ 'author'] != 'AutoModerator': content_post = post.get('selftext').replace('\n', ' ').lower() #clean_text = clean(content_post) #csv_file.writerow([post.get('id'), 0, post['author'], clean_text]) content_post = nltk.tokenize.sent_tokenize(content_post) if len(content_post) > 4: count = 0 for sent in content_post: sent = clean(sent) sent = nltk.tokenize.word_tokenize(sent) sent = ' '.join(sent) csv_file.writerow([ post.get('id'), count, post['subreddit'], sent ]) count += 1
def main(user_list, subreddit_list, output_file): reddit = Reddit(config.data_location) subreddits = {forum.strip().split("/")[-1] for forum in subreddit_list} users = {useritem.strip().split(",")[-1] for useritem in user_list} try: subreddits.remove("") except: pass subreddits = sorted(subreddits) try: users.remove("") except: pass users = sorted(users) csvf = csv.writer(output_file) csvf.writerow(["username", "month", "subreddit", "count"]) for s in subreddits: print(s) subcount = defaultdict( lambda: defaultdict(int)) # author -> month -> count for post in reddit.get_subreddit(s).posts: if post.get("author", "") in users: utc = datetime.utcfromtimestamp( post["created_utc"]).strftime('%Y-%m') subcount[post.get("author", "")][utc] += 1 for u in sorted(subcount): for t in sorted(subcount[u]): csvf.writerow([u, t, s, subcount[u][t]])
def test_mark_as_read(self): oth = Reddit(USER_AGENT) oth.login('PyApiTestUser3', '1111') # pylint: disable-msg=E1101 msg = six_next(oth.user.get_unread(limit=1)) msg.mark_as_read() self.assertTrue(msg not in oth.user.get_unread(limit=5))
def main(): """TODO... ADD LOGGING FOR: 1. Process start 2. Collection/write start 3. Collection/write end 4. Process end """ start_time = time.time() print(f'Starting {PROJECT_NAME}') # No need to hit API to collect data if we can just use the sample data if not LOCAL: # Authenticate Reddit w/ credentials reddit = Reddit() # Retrieve top posts from subreddit post_data = reddit.collect_data() # Dump post data to file as JSON write_data(post_data) else: print('LOCAL: True; skipping PRAW to use sample-reddit.json') # Download images using data recently saved in JSON file Downloader.download_images() time_elapsed = round(time.time() - start_time, 3) print(f'Finished {PROJECT_NAME} in {time_elapsed} seconds.')
def build(self): '''build the database''' reddit = Reddit() cleaner = Cleaner() for subreddit in reddit.get_subreddits(): for post in reddit.get_posts(subreddit): self.database.insert(cleaner.clean(post)) for comment in reddit.get_comments(post): self.database.insert(cleaner.clean(comment))
def test_mark_multiple_as_read(self): oth = Reddit(USER_AGENT) oth.login('PyApiTestUser3', '1111') messages = list(oth.user.get_unread(limit=2)) self.assertEqual(2, len(messages)) self.r.user.mark_as_read(messages) unread = oth.user.get_unread(limit=5) for msg in messages: self.assertTrue(msg not in unread)
def __init__(self, config_dir, logger=Null()): self.logger = logger self.log = self.logger.getLogger('phoebe.Phoebe') self.log.debug('Phoebe Thread initialized') self.log.debug('config_dir: %s' % config_dir) Thread.__init__(self) self.idx = 0 self.playlist = [] self.playing = False self.buffering = False if not path.isdir(config_dir): mkdir(config_dir) self.config_dir = config_dir self.log.debug('Loading history file') self.history = LocalStorage(path.join(config_dir, 'history.json'), logger=self.logger) self.log.debug('Loading settings file') self.settings = LocalStorage(path.join(config_dir, 'settings.json'), logger=self.logger) self.reddit = Reddit(logger=self.logger) if ('reddit_username' in self.settings.keys()) \ and ('reddit_password' in self.settings.keys()): self.reddit.login(self.settings['reddit_username'], self.settings['reddit_password']) if 'download_dir' not in self.settings.keys(): self.settings['download_dir'] = path.join(path.expanduser('~'), 'Downloads', 'phoebe') if not path.isdir(self.settings['download_dir']): mkdir(self.settings['download_dir']) self.mpq = Queue() # TODO: vlc backend support. There should also be an auto-detected fallback if 'backend' not in self.settings.keys(): self.settings['backend'] = 'mplayer' if self.settings['backend'] == 'mplayer': self.mp = MPlayerThread(queue=self.mpq, logger=self.logger) self.mp.daemon = True self.mp.start() self.playtime = 0 self.dlq = Queue() self.dl = DLThread(self.dlq, logger=self.logger) self.dl.daemon = True self.dl.start()
def main(): options = parse_args() r = Reddit(options.subreddit) info('Fecthing threads from Reddit') top_titles = r.top(options.period, options.limit) entities = [Entity(title) for title in top_titles] info('Found {} threads'.format(len(entities))) for entity in entities: try: entity.search_term = search_term_from_title(entity.reddit_title) except: error('Failed to convert Reddit title "{}" to a search term'.format(title)) refresh_token = read_refresh_token(options.refresh_token_file) try: s = Spotify(options.spotify_client_id, options.spotify_client_secret, refresh_token) except Exception as e: error('Failed to create Spotify agent') error(e) return 1 info('Searching Spotify for tracks') for entity in entities: try: entity.spotify_track = s.search_track(entity.search_term) except Exception as e: error(e) error('Skipping...') # list to Set to list - done to dedupe tracks_found = list(Set([entity.spotify_track for entity in entities if entity.spotify_track is not None])) info('Found {} Spotify tracks'.format(len(tracks_found))) if not (float(len(tracks_found)) / len(entities)) > options.search_threshold: error('Search of Spotify tracks under threshold of {}'.format(options.search_threshold)) return 1 if options.dry_run == False: try: info('Removing existing tracks from playlist') s.clear_playlist(options.playlist_id) info('Adding {} new tracks to playlist'.format(len(tracks_found))) s.add_tracks_to_playlist(options.playlist_id, tracks_found) except Exception as e: error(e) return 1 info('Run completed successfully') return 0
def test_mark_as_unread(self): oth = Reddit(USER_AGENT) oth.login('PyApiTestUser3', '1111') found = None for msg in oth.user.get_inbox(): if not msg.new: found = msg msg.mark_as_unread() break else: self.fail('Could not find a read message.') self.assertTrue(found in oth.user.get_unread())
def parse(ignore_duty=True, ignore_resolutions=True): govfeed = feedparser.parse('http://www.govtrack.us/events/events.rss?' 'feeds=misc%3Aintroducedbills') r = Reddit(user_agent='WatchingCongress/1.0') r.login('FuturistBot', '<BOTPASS>') for entry in govfeed.entries: if not entry['guid'].find('guid'): logging.info("Couldn't find GUID") continue if not entry['title']: logging.info("No title for bill: {0}".format(entry['guid'])) continue if house_collection.find_one({'guid': entry['guid']}): logging.info("Already created story: {0}".format(entry['title'])) continue if ignore_duty and 'duty' in entry['title'] and 'temporar' in entry['title']: logging.info("Ignored boring bill: {0}".format(entry['title'])) continue if ignore_resolutions and '.Res' in entry['title']: logging.info("Ignored resolution: {0}".format(entry['title'])) continue record = { 'title': entry['title'], 'description': entry['description'], 'link': entry['link'], 'guid': entry['guid'], } bill_number = entry['title'].split(':')[0] try: news_stories = find_news_stories(bill_number) except Exception as e: news_stories = [] logging.error("Couldn't parse Google News: {}".format(unicode(e))) try: text = template.render(description=entry['description'], link=entry['link'], news_stories=news_stories) r.submit('futuristparty', entry['title'], text=text) house_collection.insert(record) logging.info("Created story: {0}".format(entry['title'])) except Exception as e: logging.error("Exception occured: {0}".format(unicode(e))) time.sleep(2)
def main(): ''' Instantiates reddit and notification notifier ''' reddit = Reddit() notifier = Notifier() while True: reddit.fetch_latest_posts() for notification in reddit.notifications: if notification: notifier.send_notification(message=notification.get( 'title', ''), link=notification.get('link', ''), title='iRNBA') time.sleep(5) time.sleep(FIVE_MINUTES)
def test_mark_multiple_as_read(self): oth = Reddit(USER_AGENT) oth.login('PyApiTestUser3', '1111') messages = [] for msg in oth.user.get_unread(limit=None): if msg.author != oth.user.name: messages.append(msg) if len(messages) >= 2: return self.assertEqual(2, len(messages)) self.r.user.mark_as_read(messages) unread = oth.user.get_unread(limit=5) for msg in messages: self.assertTrue(msg not in unread)
def main(subreddits, subreddit_list, top_n): reddit = Reddit(config.data_location) subreddits = list(subreddits) if subreddit_list is not None: subreddits.extend( [forum.strip().split("/")[-1] for forum in subreddit_list]) for subreddit in subreddits: subr = reddit.get_subreddit(subreddit) top_authors = subr.top_authors(top_n) for auth in top_authors: print(subreddit + "," + auth)
def from_dict(self, dic): self.topic_name = dic['topic_name'] procon_dict = dic['procon'] procon = Procon() procon.from_dict(procon_dict) self.procon = procon reddit_dict = dic['reddit'] reddit = Reddit() reddit.from_dict(reddit_dict) self.reddit = reddit self.similarity_matrices = dic['similarity_matrices']
def main(subreddit, top_n): reddit = Reddit(config.data_location) subr = reddit.get_subreddit(subreddit) users = Counter(post['author'] for post in subr.posts) # remove deleted account del users['[deleted]'] top_authors = next(zip(*users.most_common(top_n))) print('Pulling the following authors...') print('\n'.join(top_authors)) print(top_authors) # pull 1000 posts for each user each time pull_posts(1000, authors=top_authors)
def run(self, host, port): self.reddit = Reddit(creds.key, creds.secret, creds.username, creds.password, creds.redirect_uri) self.reddit.updateToken() self.reddit.testAccess() sleeptime = 0 while True: if sleeptime > 10: time.sleep(10) elif sleeptime > 1: time.sleep(1) # Connect to host:port, get the fp fp = self.connect(host, port) # Send hostname of client over initially hostname = socket.getfqdn() fp.write(hostname+'\n') fp.flush() if debug: print 'Sent hostname' # Recv all the urls reqlist = [] newline = False while True: line = fp.readline() line = line.strip() if line != '': reqlist.append(line.split(',')) else: if newline == True: break newline = True fp.flush() print host+' >> '+str(reqlist) # See if any urls were sent, close if zero if len(reqlist) == 0: if debug: print 'No requests' self.close() sleeptime += 1 continue sleeptime = 0 if debug: print 'Downloading requests' # Download all the urls otherwise self.download_data(reqlist) # targzip the data targz = self.targz() # Send the data targz_fp = open(targz, 'rb') targz_data = targz_fp.read() fp.write(targz_data) fp.flush() print host+' << archive.tar.gz' self.close() self.cleanup()
class QReddit: def __init__(self): self.r = Reddit() (self.options, args) = self.parseArguments() if(len(args) < 1): print "Please specify type of action (textpost, linkpost, viewsub, createuser)" return self.action = args[0] if(self.options.username and self.options.password): self.user = {"username":self.options.username, "password":self.options.password} else: try: self.user = self.getUser() except IOError: print "No user was specified through --user and --password but could not find 'user.json'. Please either use createuser or use --user and --password." sys.exit() def parseArguments(self): parser = optparse.OptionParser() parser.add_option("-s", "--subreddit", help="Specify subreddit", dest="subreddit") parser.add_option("-t", "--title", help="Specify title", dest="title") parser.add_option("-b", "--body", help="Specify post body (for text post)", dest="body") parser.add_option("-l", "--link", help="Specify post link (for link post)", dest="link") parser.add_option("-u", "--user", help="Specify username", dest="username") parser.add_option("-p", "--pass", help="Specify password", dest="password") parser.add_option("-L", "--limit", help="Limit results (for view)", type="int", dest="limit") parser.add_option("-i", "--postid", help="Specify post ID", dest="postid") return parser.parse_args() def performAction(self): if(self.action == "textpost"): self.r.doTextPost(self.options, self.user) if(self.action == "linkpost"): self.r.doLinkPost(self.options, self.user) if(self.action == "viewsub"): self.r.doViewsub(self.options) if(self.action == "viewpost"): self.r.doViewpost(self.options) if(self.action == "createuser"): self.createUser(self.options.username, self.options.password) def getUser(self): try: with open("user.json") as f: user = json.load(f) except IOError: raise e return user def createUser(self, username, password): with open("user.json", "w") as f: json.dump({"username":username, "password":password}, f)
def posts2csv(post_f, authors=None, subreddits=None, seen_posts = set(), verbose=True, limit = 1000): reddit = Reddit(MongoClient('mongodb://127.0.0.1:27017')["reddit"]) subreddits = [reddit.get_subreddit(s) for s in subreddits] authors = [reddit.get_user(a) for a in authors] subredditset = set() # subreddit info doesn't seem to have the "subreddit_id". To do : get that with r/subreddit/<name>/about # for now, use subreddit name as forum identifier csvp = csv.writer(post_f) csvp.writerow("id,replyto,username,user_annotation_flairtext,annotation_over18,annotation_score,forum,discourse,title,when,dataset_file,post".split(",")) for subreddit in subreddits: print(subreddit.name) postids = set(subreddit.post_ids) - seen_posts for i, idd in enumerate(postids): post = subreddit.post(idd) if i%1000 == 999: print("post",i,"of",len(postids),limit,"to go") if "selftext" not in post or post["selftext"] == "": continue # Skip URL-only posts if "subreddit" not in post: print("No subreddit in post " + post["id"]) continue if post["id"] in seen_posts: continue csvp.writerow([post["id"],None,post["author"],post["author_flair_text"],str(post["over_18"]),str(post["score"]), post["subreddit"],"Reddit",post["title"], datetime.fromtimestamp(post["created"], tz).isoformat(), "reddit",post.get("selftext",post["url"])]) limit -= 1 if limit == 0: return for author in authors: print(author.name) postids = set(author.post_ids) - seen_posts for i,post in enumerate([author.post(id) for id in postids]): if i%1000 == 999: print("post",i,"of",len(postids),limit,"to go") if "selftext" not in post or post["selftext"] == "": continue # Skip URL-only posts if "subreddit" not in post: print("No subreddit in post " + post["id"]) continue if post["id"] in seen_posts: continue csvp.writerow([post["id"],None,post["author"],post["author_flair_text"],str(post["over_18"]),str(post["score"]), post["subreddit"],"Reddit",post["title"], datetime.fromtimestamp(post["created"], tz).isoformat(), "reddit",post.get("selftext",post["url"])]) limit -= 1 if limit == 0: return
def main(): level = 0 # Read program arguments for arg in sys.argv[1:]: (param, value) = arg.split('=') if param == '--level': level = int(value) path = os.path.dirname(os.path.realpath(__file__)) loggingConf = open('{0}/configs/logging.yml'.format(path), 'r') logging.config.dictConfig(yaml.load(loggingConf)) loggingConf.close() logger = logging.getLogger(LOGGER) logger.info('Program started') config = configparser.ConfigParser() config.read('{0}/configs/bot.ini'.format(path)) username = config['Reddit']['username'] password = config['Reddit']['password'] user_agent = config['Reddit']['user-agent'] dry_run = config['Bot'].getboolean('dry-run') if dry_run: logger.info('Running in dry run mode. Nothing will be commited') reddit = Reddit(username, password, user_agent, dry_run) history = History('{0}/{1}'.format(path, DATABASE)) news = News() if level == 0: level = int(config['Bot']['level']) news_items = news.get_news_items(level) for item in news_items: url = item[0] title = item[1] degree = item[2] if not history.has_link_been_posted(url): history.add_link_as_posted(url, dry_run) if not reddit.post_link(get_redirect_url(url), title): continue break logger.info('Program done')
def test_report(self): # login as new user to report submission oth = Reddit(USER_AGENT) oth.login('PyApiTestUser3', '1111') subreddit = oth.get_subreddit(self.sr) submission = None for submission in subreddit.get_new_by_date(): if not submission.hidden: break if not submission or submission.hidden: self.fail('Could not find a non-reported submission.') submission.report() # check if submission was reported for report in self.r.get_subreddit(self.sr).get_reports(): if report.id == submission.id: break else: self.fail('Could not find reported submission.')
def main(): load_dotenv() discord_webhook_url = os.getenv("DISCORD_WEBHOOK_URL") subreddit = os.getenv('REDDIT_SUBREDDIT') reddit_client_id = os.getenv("REDDIT_CLIENT_ID") reddit_client_secret = os.getenv("REDDIT_CLIENT_SECRET") discord = Discord(discord_webhook_url) # Echo to discord weblink reddit = Reddit(subreddit, discord.sendMsg, reddit_client_id, reddit_client_secret) # Echo locally to test. # reddit = Reddit(print_details, reddit_client_id, reddit_client_secret) reddit.run()
def parse(): govfeed = feedparser.parse('http://www.govtrack.us/events/events.rss?' 'feeds=misc%3Aintroducedbills') r = Reddit(user_agent='WatchingCongress/1.0') r.login('congressbot', '<BOTPASS>') for entry in govfeed.entries: if not entry['guid'].find('guid'): logging.info("Couldn't find GUID") continue if not entry['title']: logging.info("No title for bill: {}".format(entry['guid'])) continue if house_collection.find_one({'guid': entry['guid']}): logging.info("Already created story: {}".format(entry['title'])) continue if 'duty' in entry['title'] and 'temporar' in entry['title']: logging.info("Ignored boring bill: {}".format(entry['title'])) continue if '.Res' in entry['title']: logging.info("Ignored resolution: {}".format(entry['title'])) continue record = { 'title': entry['title'], 'description': entry['description'], 'link': entry['link'], 'guid': entry['guid'], } try: text = template.render(description=entry['description'], link=entry['link']) r.submit('watchingcongress', entry['title'], text=text) house_collection.insert(record) logging.info("Created story: {}".format(entry['title'])) except Exception as e: logging.error("Exception occured: {}".format(unicode(e))) time.sleep(2)
def select_handler(self): handler = 'handlers.{}'.format(self.config['handler']) if self.config['handler'] in self._ALLOWED_HANDLERS: class_ = ''.join(x.title() for x in self.config['handler'].split('_')) module = importlib.import_module(handler) handler_class = getattr(module, class_) return handler_class(Reddit(config).build(), self.config) else: raise HandlerNotAllowed('Handler "{}" not allowed'.format(handler))
def main(subreddit): print "Subreddit :", subreddit rsub = url_data("http://www.reddit.com/r/%s/new/.json?sort=new"%subreddit, json=True) children = rsub['data']['children'] r = Reddit(USERNAME, PASSWORD) session = r.login() f = open('history.txt', 'r') history = f.read() f.close() for child in children: is_self = child['data']['is_self'] thread_id = child['data']['name'] print thread_id if thread_id in history: print "Thread: %s already in history"%thread_id pass else: if not is_self: img_url = child['data']['url'] thread_id = child['data']['name'] repost = karmadecay(img_url) if repost: text = form_comment(repost) r_resp = r.post(session, thread_id, text) if r_resp != None: error = r_resp['json']['errors'] delay = find_digit(error[0][1]) print "waiting: %s seconds" %delay*60 time.sleep(delay*60) r.post(session, thread_id, text) f = open('history.txt', 'a') f.write("\n%s"%thread_id) print text f.close() time.sleep(1) print "Comment Posted:", thread_id else: pass else: pass print "Finished" return
def get_authors_timeline(author: str, topics: List[str]) -> AuthorTimeline: reddit = Reddit(config.data_location) posts: Dict[str, TimelinePost] = {} for topic in topics: df = TopicsDFCache.load(topic) filtered_df = df[(df.Author == author)] for _, row in filtered_df.iterrows(): post_id = row['SeqId'] sentence_number = row['InstNo'] text = row['Text'] sent = TimelineSentence(sentence_number, text, topic) if post_id not in posts: create_time = reddit.get_post(post_id)['created_utc'] posts[post_id] = TimelinePost(post_id, create_time) posts[post_id].sentences = list( sorted(posts[post_id].sentences + [sent], key=lambda x: x.number)) sorted_posts = list(sorted(posts.values(), key=lambda x: x.timestamp)) # selected_posts = list() # # year_dict = {2012:0,2013:0,2014:0,2015:0,2016:0,2017:0,2018:0} # month_dict = {1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0} # # for item in sorted_posts: # if date.fromtimestamp(item.timestamp).year in year_dict.keys(): # year_dict[date.fromtimestamp(item.timestamp).year] += 1 # year = max(year_dict.items(), key=operator.itemgetter(1))[0] # for item in sorted_posts: # if date.fromtimestamp(item.timestamp).year == year: # if date.fromtimestamp(item.timestamp).month in month_dict.keys(): # month_dict[date.fromtimestamp(item.timestamp).month] += 1 # month = max(month_dict.items(),key=operator.itemgetter(1))[0] # # for item in sorted_posts: # if date.fromtimestamp(item.timestamp).year==year and date.fromtimestamp(item.timestamp).month==month # selected_posts.append(item.sentences) # print(f'{author} most frequent year {year} and month {month}') return AuthorTimeline(author, sorted_posts)
class PostCollectorTestCase(unittest.TestCase): def setUp(self): self.reddit = Reddit() self.somePost = self.reddit.client.submission(id='8reg0o') def test_downloadImageFromSubmission(self): self.filename = self.reddit.downloadImageFromSubmission(self.somePost) self.assertEqual("temp\\vfvxr2xvd8411.jpg", self.filename) def tearDown(self): os.remove(self.filename)
def setUp(self): super(RedditTest, self).setUp() oauth_dropins.reddit.REDDIT_APP_KEY = 'my_app_key' oauth_dropins.reddit.REDDIT_APP_SECRET = 'my_app_secret' self.handler.messages = [] user = oauth_dropins.reddit.praw_to_user(gr_reddit_test.FakeRedditor()) self.auth_entity = oauth_dropins.reddit.RedditAuth( id='my_string_id', refresh_token='silly_token', user_json=json_dumps(user)) self.auth_entity.put() self.r = Reddit.new(self.handler, auth_entity=self.auth_entity)
def setUp(self): super().setUp() oauth_dropins.reddit.REDDIT_APP_KEY = 'my_app_key' oauth_dropins.reddit.REDDIT_APP_SECRET = 'my_app_secret' user = oauth_dropins.reddit.praw_to_user(gr_reddit_test.FakeRedditor()) user['name'] = 'bONkerFIeld' self.auth_entity = oauth_dropins.reddit.RedditAuth( id='my_string_id', refresh_token='silly_token', user_json=json_dumps(user)) self.auth_entity.put() self.r = Reddit.new(auth_entity=self.auth_entity)
def main(): reddit = Reddit(config.data_location) # following code explores saving user posts per user # for user in reddit.get_users(): # os.mkdir('../tmp/{user.name}') # #with open(f'../tmp/{user.name}.csv', 'w') as fp: # #csv_file = csv.writer(fp) # #count = 0 # for post in user.posts: # with open(f'../tmp/{user.name}.csv', 'w') as fp: # if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]': # #csv_file.writerow([post.get('id'), time.ctime(post['created_utc']), post.get('subreddit'), post.get('selftext').replace('\n', ' ')]) # fp.write(post.get('selftext').replace('\n', ' ')) # fp.write('\n') # following code explores saving user posts per user per post # for user in reddit.get_users(): # dirpath = '../user_posts/'+user.name # os.mkdir(dirpath) # for post in user.posts: # if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]': # filepath = os.path.join(dirpath, post.get('id')+'.txt') # with open(filepath, 'w') as fp: # fp.write(post.get('selftext').replace('\n',' ')) # following code save all user posts into one file # with open (f'../all_posts/all.txt', 'w') as fp: # for user in reddit.get_users(): # print('Processing ' + str(user.name) + ' \'s history') # for post in user.posts: # if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]': # fp.write(post.get('selftext').replace('\n', ' ')) # fp.write('\n') # the following code saves a text file per user for user in reddit.get_users(): with open(f'../user_history/{user.name}.txt', 'w') as fp: for post in user.posts: if 'selftext' in post and post[ 'selftext'] and post['selftext'] != '[removed]': fp.write(post.get('selftext').replace('\n', ' ')) fp.write('\n')
def main(subreddit_file, database): cur.execute("""select entity_source_id from data_source_instance where entity_source_descriptor = 'reddit#id#POST';""") keys = {row["entity_source_id"] for row in cur.fetchall()} subreddits = {s.strip() for s in subreddit_file} keys2 = set() for subreddit in subreddits: s = Reddit.get_subreddit(subreddit) keys2.extend({p["id"] for p in s.posts}) print "Found ", len(keys), "keys in",db," versus",len(keys2),"keys in directory" print "\n".join(list(keys2.difference(keys))[:1000])
def main(): startTime = time() parser = argparse.ArgumentParser( description='Scraps /r/GameDeals for yuge deals.') parser.add_argument('--sleep', type=int, default=5, help='Sleep duration before autorun cmd window closes') args = parser.parse_args() reddit = Reddit(TARGET_SUBREDDIT) reddit.start() execTime = time() - startTime sleep(0.5) print(f"Execution took {int(execTime)} seconds") helpers.print_animated_text("Exiting in", args.sleep, helpers.countdown) sleep(args.sleep) helpers.done()
def main(subreddit_list, keyword_list, topic_list): reddit = Reddit(config.data_location) subreddits = { subredit.strip().split("/")[-1] for subredit in subreddit_list } keywords = {keyword.strip().lower() for keyword in keyword_list} print(keywords) topics = {topic.strip().lower() for topic in topic_list} print(topics) for subreddit in subreddits: sub = reddit.get_subreddit(subreddit) with open(f'../lgbtq/data/{subreddit}.csv', 'w') as fp: csv_file = csv.writer(fp) csv_file.writerow([ 'PostId', 'PostTime', 'author', 'PostContent', 'MatchingWord', 'MatchTopic' ]) for post in sub.posts: if 'selftext' in post and post['selftext'] and post[ 'selftext'] != '[removed]' and post[ 'author'] != '[deleted]' and post[ 'author'] != 'AutoModerator': content_post = post.get('selftext').replace('\n', ' ').lower() clean_text = clean(content_post) match_1 = match(keywords, clean_text) match_2 = match(topics, clean_text) if len(set(match_1)) != 0 or len(set(match_2)) != 0: csv_file.writerow([ post.get('id'), time.ctime(post.get('created_utc')), post['author'], clean_text, set(match_1) if len(match_1) > 0 else None, set(match_2) if len(match_2) > 0 else None ])
def __init__(self, file): # Initialize the Bot super().__init__(file) self.update_time = datetime.now() # Load the configurations. with open(file, 'r') as y: # Load the configs config = yaml.load(y) # Grab the database filename from the configs. self.dbfile = config['database'] # Create a Reddit object to handle the Reddit-specific tasks. self.reddit = Reddit(self.dbfile)
def __init__(self, subreddit, site, verbosity): self.reddit = Reddit(str(self), site) self.subreddit = self.reddit.get_subreddit(subreddit) self.verbosity = verbosity self.submissions = [] self.comments = [] self.submitters = defaultdict(list) self.commenters = defaultdict(list) self.min_date = 0 self.max_date = time.time() - DAYS_IN_SECONDS * 3 self.prev_srs = None # Config self.reddit.config.comment_limit = -1 # Fetch max comments possible self.reddit.config.comment_sort = 'top'
def authenticate(self, username, password, request=None): try: reddit = Reddit(user_agent=USER_AGENT) reddit.login(username, password) r_user = reddit.user except urllib2.URLError: log.warning("Could not reach reddit. Is it down?") r_user = None except InvalidUserPass: log.Info(_('User "%s" tried to login without valid credentials')%username) return None except urllib2.HTTPError as e: log.Info(_('User "%s" tried to login without valid credentials')%username) return None try: db_user = User.objects.get(username__iexact=username) if not r_user and not db_user.check_password(password): return None if not db_user.is_active: #instead of deleting users, disable them. return None except User.DoesNotExist: #Rules for Joining if r_user and r_user.comment_karma >= JOIN_MIN_COMMENT_KARMA \ and r_user.link_karma >= JOIN_MIN_LINK_KARMA \ and (datetime.now() - datetime.utcfromtimestamp(r_user.created_utc)) >= JOIN_MIN_MEMBER_TIME: db_user = User(username=username, is_active=True) else: return None db_user.set_password(password) # Hash and store password for offline logins db_user.backend = self.__class__.__name__ db_user.save() return db_user
def __init__(self, topic_settings): if 'topic-name' not in topic_settings: print("Topic: Provide a topic name") exit(-12312) self.topic_name = topic_settings['topic-name'] if 'procon' not in topic_settings: print('Topic: Provide procon settings') exit(-124) procon_settings = topic_settings['procon'] if 'reddit' not in topic_settings: print('Topic: Provide reddit settings') exit(-3324) reddit_settings = topic_settings['reddit'] procon_settings['topic'] = self.topic_name reddit_settings['topic'] = self.topic_name self.procon = Procon(procon_settings) self.reddit = Reddit(reddit_settings)
def __init__(self, topic_settings={}): if topic_settings == {}: print("Topic: __init__: No settings given. Creating empty object.") return if 'topic-name' not in topic_settings: print("Topic: Provide a topic name") exit(-12312) self.topic_name = topic_settings['topic-name'] if 'procon' not in topic_settings: print('Topic: Provide procon settings') exit(-124) procon_settings = topic_settings['procon'] if 'reddit' not in topic_settings: print('Topic: Provide reddit settings') exit(-3324) reddit_settings = topic_settings['reddit'] procon_settings['topic'] = self.topic_name reddit_settings['topic'] = self.topic_name self.procon = Procon(procon_settings) self.reddit = Reddit(reddit_settings) self.similarity_matrices = {} for name, similarity_matrix_algorithm in similarity_matrix_algorithms.items( ): self.similarity_matrices[name] = similarity_matrix_algorithm.match( [comment.text for comment in self.get_all_comments()], self.get_pros(), self.get_cons())
def __init__(self, updater): self.sd = SpoilerDetector() self.ms = MythicSpoiler() self.yolo = Yolo(config.model, config.classes, config.conf) self.reddit = Reddit(subreddit="magicTCG") self.scryfall_futur_cards_id = [] self.reddit_futur_cards_subm_id = [] self.mythicspoiler_futur_cards_url = [] self.limit_days = 45 # List of Spoiler Objects limit_date = datetime.today() - timedelta(days=self.limit_days) self.spoiled = Session.query(Spoiler).filter( Spoiler.found_at > limit_date).all() # Job queues: updater.job_queue.run_repeating(self.general_crawl, interval=60, first=10)
def __init__(self): self.r = Reddit() (self.options, args) = self.parseArguments() if(len(args) < 1): print "Please specify type of action (textpost, linkpost, viewsub, createuser)" return self.action = args[0] if(self.options.username and self.options.password): self.user = {"username":self.options.username, "password":self.options.password} else: try: self.user = self.getUser() except IOError: print "No user was specified through --user and --password but could not find 'user.json'. Please either use createuser or use --user and --password." sys.exit()
def process_topic(topic): reddit = Reddit(config.data_location) df = TopicsDFCache.load(topic) # Add Year column #df['Year'] = df.progress_apply(partial(get_year_from_row, reddit), axis=1) df['Year-Month'] = df.progress_apply(partial(get_year_month_from_row, reddit), axis=1) # min =df['Year'].min() # max =df['Year'].max() # # print(f'Year range from {min} to {max}') # Filter out unneeded authors df = df[df['Author'].isin(full_authors)] df.to_csv( os.path.join(config.topic_dir, f'{topic}-filtered-with_year_and_month.csv'))
def get_user(user): ''' Gets the information from the specified public user object. @param: the PublicUser object that points to the target user @return: a 2D parallel list: [img_urls, titles, scores, authors] ''' # initializes the reddit praw wrapper reddit = Reddit(user.returnSubreddit()[2], NUM_POSTS) # gets the data from reddit img_urls = reddit.getImageUrl() titles = reddit.getTitle() scores = reddit.getScore() authors = reddit.getAuthor() return [img_urls, titles, scores, authors]
class Topic: def __init__(self, topic_settings): if 'topic-name' not in topic_settings: print("Topic: Provide a topic name") exit(-12312) self.topic_name = topic_settings['topic-name'] if 'procon' not in topic_settings: print('Topic: Provide procon settings') exit(-124) procon_settings = topic_settings['procon'] if 'reddit' not in topic_settings: print('Topic: Provide reddit settings') exit(-3324) reddit_settings = topic_settings['reddit'] procon_settings['topic'] = self.topic_name reddit_settings['topic'] = self.topic_name self.procon = Procon(procon_settings) self.reddit = Reddit(reddit_settings) def getAllComments(self): return self.reddit.getAllComments() def getPros(self): return self.procon.pros def getCons(self): return self.procon.cons #topic = Topic({'topic-name': 'medical marijuana', 'procon': {'mode': 'find'}, 'reddit': {'mode': 'find'}}) #print(topic.procon.background)
def get_category(category): ''' Gets the information from the specified public category object. @param: Category object representing the target category @return: a 2D parallel list: [img_urls, titles, scores, authors] ''' # initializes the reddit praw wrapper reddit = Reddit(category.subreddit, NUM_POSTS) # gets the data from reddit img_urls = reddit.getImageUrl() titles = reddit.getTitle() scores = reddit.getScore() authors = reddit.getAuthor() return [img_urls, titles, scores, authors]
def __init__(self, subreddit, site=None, verbose=None): self.reddit = Reddit(str(self), site) self.sub = self.reddit.get_subreddit(subreddit) self.verbose = verbose self._current_flair = None
def post_to_reddit(self): reddit_connection = Reddit(user_agent='wootbot/1.0') reddit_connection.login(bot_username, bot_password) reddit_connection.submit('woot', self.__str__(), url=self.url)
class SubRedditStats(object): VERSION = '0.2.0' post_prefix = 'Subreddit Stats:' post_header = '---\n###%s\n' post_footer = ('>Generated with [BBoe](/user/bboe)\'s [Subreddit Stats]' '(https://github.com/bboe/subreddit_stats) \n%s' 'SRS Marker: %d') re_marker = re.compile('SRS Marker: (\d+)') @staticmethod def _previous_max(submission): try: val = SubRedditStats.re_marker.findall(submission.selftext)[-1] return float(val) except (IndexError, TypeError): print 'End marker not found in previous submission. Aborting' sys.exit(1) @staticmethod def _permalink(permalink): tokens = permalink.split('/') if tokens[8] == '': # submission return '/comments/%s/_/' % (tokens[6]) else: # comment return '/comments/%s/_/%s?context=1' % (tokens[6], tokens[8]) @staticmethod def _user(user): return '[%s](/user/%s)' % (user.replace('_', '\_'), user) def __init__(self, subreddit, site, verbosity): self.reddit = Reddit(str(self), site) self.subreddit = self.reddit.get_subreddit(subreddit) self.verbosity = verbosity self.submissions = [] self.comments = [] self.submitters = defaultdict(list) self.commenters = defaultdict(list) self.min_date = 0 self.max_date = time.time() - DAYS_IN_SECONDS * 3 self.prev_srs = None # Config self.reddit.config.comment_limit = -1 # Fetch max comments possible self.reddit.config.comment_sort = 'top' def __str__(self): return 'BBoe\'s SubRedditStats %s' % self.VERSION def login(self, user, pswd): if self.verbosity > 0: print 'Logging in' self.reddit.login(user, pswd) def msg(self, msg, level, overwrite=False): if self.verbosity >= level: sys.stdout.write(msg) if overwrite: sys.stdout.write('\r') sys.stdout.flush() else: sys.stdout.write('\n') def prev_stat(self, prev_url): submission = self.reddit.get_submission(prev_url) self.min_date = self._previous_max(submission) self.prev_srs = prev_url def fetch_recent_submissions(self, max_duration, after, exclude_self, since_last=True): '''Fetches recent submissions in subreddit with boundaries. Does not include posts within the last three days as their scores may not be representative. Keyword arguments: max_duration -- When set, specifies the number of days to include after -- When set, fetch all submission after this submission id. exclude_self -- When true, don't include self posts. since_last -- When true use info from last submission to determine the stop point ''' if max_duration: self.min_date = self.max_date - DAYS_IN_SECONDS * max_duration url_data = {'after': after} if after else None self.msg('DEBUG: Fetching submissions', 1) for submission in self.subreddit.get_new_by_date(limit=None, url_data=url_data): if submission.created_utc > self.max_date: continue if submission.created_utc <= self.min_date: break if (since_last and str(submission.author) == str(self.reddit.user) and submission.title.startswith(self.post_prefix)): # Use info in this post to update the min_date # And don't include this post self.msg('Found previous: %s' % submission.title, 2) if self.prev_srs == None: # Only use the most recent self.min_date = max(self.min_date, self._previous_max(submission)) self.prev_srs = submission.permalink continue if exclude_self and submission.is_self: continue self.submissions.append(submission) self.msg('DEBUG: Found %d submissions' % len(self.submissions), 1) if len(self.submissions) == 0: return False # Update real min and max dates self.submissions.sort(key=lambda x: x.created_utc) self.min_date = self.submissions[0].created_utc self.max_date = self.submissions[-1].created_utc return True def fetch_top_submissions(self, top, exclude_self): '''Fetches top 1000 submissions by some top value. Keyword arguments: top -- One of week, month, year, all exclude_self -- When true, don't include self posts. ''' if top not in ('day', 'week', 'month', 'year', 'all'): raise TypeError('%r is not a valid top value' % top) self.msg('DEBUG: Fetching submissions', 1) url_data = {'t': top} for submission in self.subreddit.get_top(limit=None, url_data=url_data): if exclude_self and submission.is_self: continue self.submissions.append(submission) self.msg('DEBUG: Found %d submissions' % len(self.submissions), 1) if len(self.submissions) == 0: return False # Update real min and max dates self.submissions.sort(key=lambda x: x.created_utc) self.min_date = self.submissions[0].created_utc self.max_date = self.submissions[-1].created_utc return True def process_submitters(self): self.msg('DEBUG: Processing Submitters', 1) for submission in self.submissions: if submission.author: self.submitters[str(submission.author)].append(submission) def process_commenters(self): num = len(self.submissions) self.msg('DEBUG: Processing Commenters on %d submissions' % num, 1) for i, submission in enumerate(self.submissions): self.msg('%d/%d submissions' % (i + 1, num), 2, overwrite=True) if submission.num_comments == 0: continue try: self.comments.extend(submission.all_comments_flat) except Exception as exception: print 'Exception fetching comments on %r: %s' % (submission.content_id, str(exception)) for orphans in submission._orphaned.values(): self.comments.extend(orphans) for comment in self.comments: if comment.author: self.commenters[str(comment.author)].append(comment) def basic_stats(self): sub_ups = sum(x.ups for x in self.submissions) sub_downs = sum(x.downs for x in self.submissions) comm_ups = sum(x.ups for x in self.comments) comm_downs = sum(x.downs for x in self.comments) sub_up_perc = sub_ups * 100 / (sub_ups + sub_downs) comm_up_perc = comm_ups * 100 / (comm_ups + comm_downs) values = [('Total', len(self.submissions), '', len(self.comments), ''), ('Unique Redditors', len(self.submitters), '', len(self.commenters), ''), ('Upvotes', sub_ups, '%d%%' % sub_up_perc, comm_ups, '%d%%' % comm_up_perc), ('Downvotes', sub_downs, '%d%%' % (100 - sub_up_perc), comm_downs, '%d%%' % (100 - comm_up_perc))] retval = '||Submissions|%|Comments|%|\n:-:|--:|--:|--:|--:\n' for quad in values: retval += '__%s__|%d|%s|%d|%s\n' % quad return '%s\n' % retval def top_submitters(self, num, num_submissions): num = min(num, len(self.submitters)) if num <= 0: return '' top_submitters = sorted(self.submitters.items(), reverse=True, key=lambda x: (sum(y.score for y in x[1]), len(x[1])))[:num] retval = self.post_header % 'Top Submitters\' Top Submissions' for (author, submissions) in top_submitters: retval += '0. %d pts, %d submissions: %s\n' % ( sum(x.score for x in submissions), len(submissions), self._user(author)) for sub in sorted(submissions, reverse=True, key=lambda x: x.score)[:num_submissions]: title = sub.title.replace('\n', ' ').strip() if sub.permalink != sub.url: retval += ' 0. [%s](%s)' % (title, sub.url) else: retval += ' 0. %s' % title retval += ' (%d pts, [%d comments](%s))\n' % ( sub.score, sub.num_comments, self._permalink(sub.permalink)) retval += '\n' return retval def top_commenters(self, num): score = lambda x: x.ups - x.downs num = min(num, len(self.commenters)) if num <= 0: return '' top_commenters = sorted(self.commenters.items(), reverse=True, key=lambda x: (sum(score(y) for y in x[1]), len(x[1])))[:num] retval = self.post_header % 'Top Commenters' for author, comments in top_commenters: retval += '0. %s (%d pts, %d comments)\n' % ( self._user(author), sum(score(x) for x in comments), len(comments)) return '%s\n' % retval def top_submissions(self, num): num = min(num, len(self.submissions)) if num <= 0: return '' top_submissions = sorted(self.submissions, reverse=True, key=lambda x: x.score)[:num] retval = self.post_header % 'Top Submissions' for sub in top_submissions: author = str(sub.author) title = sub.title.replace('\n', ' ').strip() if sub.permalink != sub.url: retval += '0. [%s](%s)' % (title, sub.url) else: retval += '0. %s' % title retval += ' by %s (%d pts, [%d comments](%s))\n' % ( self._user(author), sub.score, sub.num_comments, self._permalink(sub.permalink)) return '%s\n' % retval def top_comments(self, num): score = lambda x: x.ups - x.downs num = min(num, len(self.comments)) if num <= 0: return '' top_comments = sorted(self.comments, reverse=True, key=score)[:num] retval = self.post_header % 'Top Comments' for comment in top_comments: author = str(comment.author) title = comment.submission.title.replace('\n', ' ').strip() retval += ('0. %d pts: %s\'s [comment](%s) in %s\n' % (score(comment), self._user(author), self._permalink(comment.permalink), title)) return '%s\n' % retval def publish_results(self, subreddit, submitters, commenters, submissions, comments, top, debug=False): def timef(timestamp): dtime = datetime.fromtimestamp(timestamp) return dtime.strftime('%Y-%m-%d %H:%M PDT') title = '%s %s %ssubmissions from %s to %s' % ( self.post_prefix, str(self.subreddit), 'top ' if top else '', timef(self.min_date), timef(self.max_date)) if self.prev_srs: prev = '[Previous Stat](%s) \n' % self._permalink(self.prev_srs) else: prev = '' basic = self.basic_stats() t_commenters = self.top_commenters(commenters) t_submissions = self.top_submissions(submissions) t_comments = self.top_comments(comments) footer = self.post_footer % (prev, self.max_date) body = '' num_submissions = 10 while body == '' or len(body) > MAX_BODY_SIZE and num_submissions > 2: t_submitters = self.top_submitters(submitters, num_submissions) body = (basic + t_submitters + t_commenters + t_submissions + t_comments + footer) num_submissions -= 1 if len(body) > MAX_BODY_SIZE: print 'The resulting message is too big. Not submitting.' debug = True if not debug: msg = ('You are about to submit to subreddit %s as %s.\n' 'Are you sure? yes/[no]: ' % (subreddit, str(self.reddit.user))) if raw_input(msg).lower() not in ['y', 'yes']: print 'Submission aborted' else: try: self.reddit.submit(subreddit, title, text=body) return except Exception, error: print 'The submission failed:', error # We made it here either to debug=True or an error. print title print body
class Client: sock = None reddit = None def download_a(self, after=None): download_dir = os.path.join(tmpdir, 'client', 'staging') posts, nav = self.reddit.getListing('all', after) blob = { 'posts': posts, 'nav': nav } fp = open(os.path.join(download_dir, 'a_'+after), 'w') fp.write(json.dumps(blob)) fp.close() return True def download_p(self, pid): download_dir = os.path.join(tmpdir, 'client', 'staging') self.reddit.updateToken() post, comments = self.reddit.getPost(pid) blob = { 'post': post, 'comments': comments } fp = open(os.path.join(download_dir, 'p_'+pid), 'w') fp.write(json.dumps(blob)) fp.close() return True def download_u(self, user): download_dir = os.path.join(tmpdir, 'client', 'staging') url = 'http://www.reddit.com/user/'+user+'.json' filename = 'u_'+user data = self.download_get(url, os.path.join(download_dir, filename), True) if data == '': return False blob = json.loads(data) nav = parser.extract_listing_nav(blob) while nav['after'] is not None: newurl = url+'?after='+nav['after'] filename = 'u_'+user+'_'+nav['after'] data = self.download_get(newurl, os.path.join(download_dir, filename), True) if data == '': return False blob = json.loads(data) nav = parser.extract_listing_nav(blob) return True def download_req(self, req): # format of request: # | a | <pid> # | p | <pid> # | u | <username> | <after> res = True if req[0] == 'a': res = self.download_a(req[1]) elif req[0] == 'p': res = self.download_p(req[1]) elif req[0] == 'u': res = self.download_u(req[1]) return res def download_data(self, reqlist): for req in reqlist: self.download_req(req) print ' -- '+str(req) def connect(self, host, port): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, port)) self.sock = sock fp = sock.makefile('rb+') print 'Connected to '+host+':'+str(port) return fp def close(self): self.sock.close() self.sock = None def cleanup(self): download_dir = os.path.join(tmpdir, 'client', 'staging') for entry in os.listdir(download_dir): os.unlink(os.path.join(download_dir, entry)) os.unlink(os.path.join(tmpdir, 'client', 'archive.tar.gz')) def targz(self): return shutil.make_archive(os.path.join(tmpdir, 'client', 'archive'), 'gztar', os.path.join(tmpdir, 'client', 'staging')) def run(self, host, port): self.reddit = Reddit(creds.key, creds.secret, creds.username, creds.password, creds.redirect_uri) self.reddit.updateToken() self.reddit.testAccess() sleeptime = 0 while True: if sleeptime > 10: time.sleep(10) elif sleeptime > 1: time.sleep(1) # Connect to host:port, get the fp fp = self.connect(host, port) # Send hostname of client over initially hostname = socket.getfqdn() fp.write(hostname+'\n') fp.flush() if debug: print 'Sent hostname' # Recv all the urls reqlist = [] newline = False while True: line = fp.readline() line = line.strip() if line != '': reqlist.append(line.split(',')) else: if newline == True: break newline = True fp.flush() print host+' >> '+str(reqlist) # See if any urls were sent, close if zero if len(reqlist) == 0: if debug: print 'No requests' self.close() sleeptime += 1 continue sleeptime = 0 if debug: print 'Downloading requests' # Download all the urls otherwise self.download_data(reqlist) # targzip the data targz = self.targz() # Send the data targz_fp = open(targz, 'rb') targz_data = targz_fp.read() fp.write(targz_data) fp.flush() print host+' << archive.tar.gz' self.close() self.cleanup()
def get_messages(): r = Reddit(user_agent='calpoly-flair') r.login(user=REDDIT_USERNAME, password=REDDIT_PASSWORD) inbox = r.get_inbox() return inbox.get_messages()
class ShowerThoughtBot(Bot): def __init__(self, file): # Initialize the Bot super().__init__(file) self.update_time = datetime.now() # Load the configurations. with open(file, 'r') as y: # Load the configs config = yaml.load(y) # Grab the database filename from the configs. self.dbfile = config['database'] # Create a Reddit object to handle the Reddit-specific tasks. self.reddit = Reddit(self.dbfile) def parse_message(self, msg, chan, fromNick): # logger.debug("parse_message starting with msg " + msg) if msg.find("PING :") != -1: self.ping() elif (msg.find(":hello {}".format(self.nick)) != -1 or msg.find(":hello, {}".format(self.nick)) != -1 or msg.find(":hi {}".format(self.nick)) != -1): logger.info(msg) self.hello(chan, fromNick) elif (msg.find(":!showerthought") != -1 or msg.find(":{}: thought".format(self.nick)) != -1 or msg.find(":!stb thought") != -1): logger.info(msg) self.print_shower_thought(chan, fromNick) elif (msg.find(":{}: help".format(self.nick)) != -1 or msg.find(":!stb help") != -1): logger.info(msg) self.print_help(chan) elif (msg.find(":!stb source") != -1 or msg.find(":{}: source".format(self.nick)) != -1): logger.info(msg) self.print_source_link(chan) elif msg.find(":{}: updatedb".format(self.nick)) != -1: if not fromNick == 'mlane': self.send_message(chan, "Don't tell me what to do!") else: self.send_message(chan, "Pulling in some thoughts.") self.update_database(False) elif msg.find(":{}: shruggie".format(self.nick)) != -1: logger.debug("trying to print shruggie") self.print_shruggie(chan) else: logger.info(msg) return def print_source_link(self, chan): self.send_message(chan, "ShowerThoughtBot is by Mike Lane, " "https://github.com/mikelane/ShowerThoughtBot") self.send_message(chan, "Feel free to fork or report issues.") def print_help(self, chan): lines = [] lines.append("I respond to {}: $command or !stb command".format( self.nick)) lines.append("$command = [help|thought|source]") lines.append("Get a shower thought with !showerthought.") lines.append("More to come...") lines.append("[email protected] for bugs.") for line in lines: self.send_message(chan, line) def print_shower_thought(self, chan, nick): # #self.db_lock.acquire() db = DBAdapter(self.dbfile) thought = db.get_random_thought() self.send_message(chan, "okay {}: \"{}\" -{}\r\n".format( nick, thought[1], thought[2])) def print_shruggie(self, chan): self.send_message(chan, "\udcc2\udcaf\_(" "\udce3\udc83\udc84)_/\udcc2\udcaf") def update_database(self, Scheduled=True): if Scheduled: now = datetime.now() duration = now - self.update_time duration = int(duration.total_seconds()) if duration >= 86400: logger.info('Updating database on schedule.') self.update_time = now #self.db_lock.acquire() self.reddit.get_daily_top() #self.db_lock.release() else: self.reddit.get_daily_top() def message_handler(self, message): """The message handler breaks out the channel and nick of the sender and passes this on to the parser. """ logger.debug("message_handler started with message " + message) chan = re.search('(\#\w+ )', message) if chan: chan = chan.group(1) fromNick = re.search('(\:\w+\!)', message) if fromNick: fromNick = fromNick.group(1) fromNick = fromNick.strip(':!') self.parse_message(message, chan, fromNick) return # Run the bot! def run(self): messages = [] while True: buffer = self.read() if len(buffer) > 0: messages = buffer.splitlines() buffer = "" while len(messages) > 0: self.message_handler(messages.pop(0)) self.update_database() time.sleep(1)
def test_mark_as_read(self): oth = Reddit('reddit_api test suite') oth.login('PyApiTestUser3', '1111') msg = oth.user.get_unread(limit=1).next() # pylint: disable-msg=E1101 msg.mark_as_read() self.assertTrue(msg not in list(oth.user.get_unread(limit=5)))
def __init__(self): self._reddit = Reddit(user_agent='anagram_bot') self._anagram = Wordplay() self._maintainer = None self._output = AnagramBot.OUT_STDOUT
class AnagramBot: OUT_STDOUT = 1 OUT_MAINTAINER = 2 OUT_REPLY = 4 OUT_DEBUG_REPLY = 8 def __init__(self): self._reddit = Reddit(user_agent='anagram_bot') self._anagram = Wordplay() self._maintainer = None self._output = AnagramBot.OUT_STDOUT def setMaintainer(self, username): self._maintainer = username def setOutput(self, outputMode): self._output = outputMode def login(self, username, password): self._reddit.login(username, password) def postPalindrome(self): comments = list(self._fetchComments()) for comment in comments: palindrome = self._anagram.pickRandomPalindrome(comment.body) if palindrome != None: print palindrome else: print "Nope:", comment.body[:70].replace("\n", "") def makeFunny(self): comments = list(self._fetchComments()) attempts = [] anagrams = [] maxAttempts = 20 i = 0 while len(attempts) < 10 and i < maxAttempts: i += 1 comment = random.choice(comments) anagrams = self._attempt(comment.body) anagrams = sorted(anagrams, key=lambda x: -len(x[1])) if len(anagrams) > 0: attempts.append( (comment,anagrams) ) if len(attempts) == 0: return attempts = sorted(attempts, key=lambda x: -len(x[1][0][1])) (comment, anagrams) = attempts[0] anagrams = filter(lambda x: len(x[1]) > 3, anagrams) reply = self._replace(comment.body, anagrams) self._sendFunny(comment, reply) def _sendFunny(self, comment, reply): if self._output & AnagramBot.OUT_STDOUT: self._printReply(comment, reply) if self._output & AnagramBot.OUT_MAINTAINER: self._debugPM(comment.permalink + "\n\n" + reply) if self._output & AnagramBot.OUT_DEBUG_REPLY: self._moderatedReply(comment, reply) if self._output & AnagramBot.OUT_REPLY: comment.reply( reply ) def _debugPM(self, message): if self._maintainer == None: raise ValueError("No maintainer is set! Use setMaintainer(str).") self._reddit.compose_message(self._maintainer, "AnagramBot debug", message) def _printReply(self, comment, reply): print comment.body print "===================" print reply def _moderatedReply(self, comment, reply): self._printReply(comment,reply) print comment.permalink response = raw_input("Send this [YES/NO]? ") if response.strip() == "YES": print "Sending reply..." comment.reply(reply) else: print "Aborted." def _replace(self, text, anagrams): for anagram in anagrams: pattern = "([^A-Za-z'0-9])" + anagram[0] + "([^A-Za-z'0-9])" replace = "\\1" + anagram[1] + "\\2" text = re.sub(pattern, replace, text) return text def _attempt(self, text): result = [] noMatches = True for match in re.findall("[A-Za-z'0-9]+", text): for anagram in self._anagram.solveRandomAnagram(match, 5): if anagram != None and anagram != match.upper(): anagram = _matchCase(match, anagram) result.append( (match, anagram) ) return result def _fetchComments(self): return self._reddit.get_all_comments()
def test_moderator_requried(self): oth = Reddit(USER_AGENT) oth.login('PyApiTestUser3', '1111') self.assertRaises(errors.ModeratorRequired, oth.get_settings, self.sr)
class ModUtils(object): VERSION = '0.1.dev' def __init__(self, subreddit, site=None, verbose=None): self.reddit = Reddit(str(self), site) self.sub = self.reddit.get_subreddit(subreddit) self.verbose = verbose self._current_flair = None def __str__(self): return 'BBoe\'s ModUtils %s' % self.VERSION def add_users(self, category): mapping = {'banned': 'ban', 'contributors': 'make_contributor', 'moderators': 'make_moderator'} if category not in mapping: print '%r is not a valid option for --add' % category return func = getattr(self.sub, mapping[category]) print 'Enter user names (any separation should suffice):' data = sys.stdin.read().strip() for name in re.split('[^A-Za-z_]+', data): func(name) print 'Added %r to %s' % (name, category) def current_flair(self): if self._current_flair is None: self._current_flair = [] if self.verbose: print 'Fetching flair list for %s' % self.sub for flair in self.sub.flair_list(): self._current_flair.append(flair) yield flair else: for item in self._current_flair: yield item def flair_template_sync(self, editable, limit, # pylint: disable-msg=R0912 static, sort, use_css, use_text): # Parameter verification if not use_text and not use_css: raise Exception('At least one of use_text or use_css must be True') sorts = ('alpha', 'size') if sort not in sorts: raise Exception('Sort must be one of: %s' % ', '.join(sorts)) # Build current flair list along with static values if static: counter = dict((x, limit) for x in static) else: counter = {} if self.verbose: sys.stdout.write('Retrieving current flair') sys.stdout.flush() for flair in self.current_flair(): if self.verbose: sys.stdout.write('.') sys.stdout.flush() if use_text and use_css: key = (flair['flair_text'], flair['flair_css_class']) elif use_text: key = flair['flair_text'] else: key = flair['flair_css_class'] if key in counter: counter[key] += 1 else: counter[key] = 1 if self.verbose: print # Sort flair list items according to the specified sort if sort == 'alpha': items = sorted(counter.items()) else: items = sorted(counter.items(), key=lambda x: x[1], reverse=True) # Clear current templates and store flair according to the sort if self.verbose: print 'Clearing current flair templates' self.sub.clear_flair_templates() for key, count in items: if not key or count < limit: continue if use_text and use_css: text, css = key elif use_text: text, css = key, '' else: text, css = '', key if self.verbose: print 'Adding template: text: "%s" css: "%s"' % (text, css) self.sub.add_flair_template(text, css, editable) def login(self, user, pswd): if self.verbose: print 'Logging in' self.reddit.login(user, pswd) if self.verbose: print 'Fetching moderator list for %s' % self.sub if str(self.sub).lower() not in [str(x).lower() for x in self.reddit.user.my_moderation()]: raise Exception('You do not moderate %s' % self.sub) def message(self, category, subject, msg_file): users = getattr(self.sub, 'get_%s' % category)() if not users: print 'There are no %s on %s.' % (category, str(self.sub)) return if msg_file: try: msg = open(msg_file).read() except IOError, error: print str(error) return else: