Exemplo n.º 1
0
def main():
    reddit = Reddit(config.data_location)

    # following code explores saving user posts per user
    for user in reddit.get_users():
        #os.mkdir('../tmp/{user.name}')
        with open(f'../tmp/{user.name}.csv', 'w') as fp:
            csv_file = csv.writer(fp)
            csv_file.writerow(['SeqId', 'InstNo', 'Author', 'Text'])
            for post in user.posts:
                #with open(f'../tmp/{user.name}.csv', 'w') as fp:
                if 'selftext' in post and post['selftext'] and post[
                        'selftext'] != '[removed]' and post[
                            'subreddit'] != 'makeupexchange':
                    content_post = post.get('selftext').replace('\n', ' ')
                    content_post = re.sub(r'\[.*?\]\(http\S+\)|http\S+',
                                          '',
                                          content_post,
                                          flags=re.MULTILINE)
                    content_post = nltk.tokenize.word_tokenize(content_post)
                    content_post = ' '.join(content_post)
                    content_post = nltk.tokenize.sent_tokenize(content_post)
                    for i in range(len(content_post) - 1):
                        content_post[i] = content_post[i] + ' <SENT>'
                    content_post = ' '.join(content_post)
                    csv_file.writerow(
                        [post.get('id'), 0, user.name, content_post])
Exemplo n.º 2
0
def main(subreddit_list):
    reddit = Reddit(config.data_location)
    subreddits = {
        subredit.strip().split("/")[-1]
        for subredit in subreddit_list
    }

    for subreddit in subreddits:
        sub = reddit.get_subreddit(subreddit)
        with open(f'../acl/{subreddit}_user_perline.csv', 'w') as fp:
            csv_file = csv.writer(fp)
            csv_file.writerow(['SeqId', 'InstNo', 'Author', 'Text'])
            for post in sub.posts:
                if 'selftext' in post and post['selftext'] and post[
                        'selftext'] != '[removed]' and post[
                            'author'] != '[deleted]' and post[
                                'author'] != 'AutoModerator':
                    content_post = post.get('selftext').replace('\n',
                                                                ' ').lower()
                    #clean_text = clean(content_post)
                    #csv_file.writerow([post.get('id'), 0, post['author'], clean_text])
                    content_post = nltk.tokenize.sent_tokenize(content_post)
                    if len(content_post) > 4:
                        count = 0
                        for sent in content_post:
                            sent = clean(sent)
                            sent = nltk.tokenize.word_tokenize(sent)
                            sent = ' '.join(sent)
                            csv_file.writerow([
                                post.get('id'), count, post['subreddit'], sent
                            ])
                            count += 1
Exemplo n.º 3
0
def main(user_list, subreddit_list, output_file):
    reddit = Reddit(config.data_location)
    subreddits = {forum.strip().split("/")[-1] for forum in subreddit_list}
    users = {useritem.strip().split(",")[-1] for useritem in user_list}
    try:
        subreddits.remove("")
    except:
        pass
    subreddits = sorted(subreddits)
    try:
        users.remove("")
    except:
        pass
    users = sorted(users)
    csvf = csv.writer(output_file)
    csvf.writerow(["username", "month", "subreddit", "count"])

    for s in subreddits:
        print(s)
        subcount = defaultdict(
            lambda: defaultdict(int))  # author -> month -> count
        for post in reddit.get_subreddit(s).posts:
            if post.get("author", "") in users:
                utc = datetime.utcfromtimestamp(
                    post["created_utc"]).strftime('%Y-%m')
                subcount[post.get("author", "")][utc] += 1
        for u in sorted(subcount):
            for t in sorted(subcount[u]):
                csvf.writerow([u, t, s, subcount[u][t]])
Exemplo n.º 4
0
 def test_mark_as_read(self):
     oth = Reddit(USER_AGENT)
     oth.login('PyApiTestUser3', '1111')
     # pylint: disable-msg=E1101
     msg = six_next(oth.user.get_unread(limit=1))
     msg.mark_as_read()
     self.assertTrue(msg not in oth.user.get_unread(limit=5))
Exemplo n.º 5
0
def main():
    """TODO...
    ADD LOGGING FOR:
    1. Process start
    2. Collection/write start
    3. Collection/write end
    4. Process end
    """
    start_time = time.time()
    print(f'Starting {PROJECT_NAME}')

    # No need to hit API to collect data if we can just use the sample data
    if not LOCAL:
        # Authenticate Reddit w/ credentials
        reddit = Reddit()
        # Retrieve top posts from subreddit
        post_data = reddit.collect_data()
        # Dump post data to file as JSON
        write_data(post_data)
    else:
        print('LOCAL: True; skipping PRAW to use sample-reddit.json')

    # Download images using data recently saved in JSON file
    Downloader.download_images()

    time_elapsed = round(time.time() - start_time, 3)
    print(f'Finished {PROJECT_NAME} in {time_elapsed} seconds.')
Exemplo n.º 6
0
 def build(self):
     '''build the database'''
     reddit = Reddit()
     cleaner = Cleaner()
     for subreddit in reddit.get_subreddits():
         for post in reddit.get_posts(subreddit):
             self.database.insert(cleaner.clean(post))
             for comment in reddit.get_comments(post):
                 self.database.insert(cleaner.clean(comment))
Exemplo n.º 7
0
 def test_mark_multiple_as_read(self):
     oth = Reddit(USER_AGENT)
     oth.login('PyApiTestUser3', '1111')
     messages = list(oth.user.get_unread(limit=2))
     self.assertEqual(2, len(messages))
     self.r.user.mark_as_read(messages)
     unread = oth.user.get_unread(limit=5)
     for msg in messages:
         self.assertTrue(msg not in unread)
Exemplo n.º 8
0
    def __init__(self, config_dir, logger=Null()):

        self.logger = logger
        self.log = self.logger.getLogger('phoebe.Phoebe')
        self.log.debug('Phoebe Thread initialized')
        self.log.debug('config_dir: %s' % config_dir)

        Thread.__init__(self)

        self.idx = 0
        self.playlist = []
        self.playing = False
        self.buffering = False

        if not path.isdir(config_dir):
            mkdir(config_dir)

        self.config_dir = config_dir

        self.log.debug('Loading history file')
        self.history = LocalStorage(path.join(config_dir, 'history.json'),
                                    logger=self.logger)

        self.log.debug('Loading settings file')
        self.settings = LocalStorage(path.join(config_dir, 'settings.json'),
                                     logger=self.logger)

        self.reddit = Reddit(logger=self.logger)
        if ('reddit_username' in self.settings.keys()) \
          and ('reddit_password' in self.settings.keys()):
            self.reddit.login(self.settings['reddit_username'],
                              self.settings['reddit_password'])

        if 'download_dir' not in self.settings.keys():
            self.settings['download_dir'] = path.join(path.expanduser('~'),
                                                      'Downloads', 'phoebe')

        if not path.isdir(self.settings['download_dir']):
            mkdir(self.settings['download_dir'])

        self.mpq = Queue()
        # TODO: vlc backend support. There should also be an auto-detected fallback
        if 'backend' not in self.settings.keys():
            self.settings['backend'] = 'mplayer'
        if self.settings['backend'] == 'mplayer':
            self.mp = MPlayerThread(queue=self.mpq, logger=self.logger)
        self.mp.daemon = True
        self.mp.start()

        self.playtime = 0

        self.dlq = Queue()
        self.dl = DLThread(self.dlq, logger=self.logger)
        self.dl.daemon = True
        self.dl.start()
def main():
    options = parse_args()

    r = Reddit(options.subreddit)

    info('Fecthing threads from Reddit')
    top_titles = r.top(options.period, options.limit)
    entities = [Entity(title) for title in top_titles]
    info('Found {} threads'.format(len(entities)))
    
    for entity in entities:
        try:
            entity.search_term = search_term_from_title(entity.reddit_title)
        except:
            error('Failed to convert Reddit title "{}" to a search term'.format(title))

    refresh_token = read_refresh_token(options.refresh_token_file)

    try:
        s = Spotify(options.spotify_client_id, options.spotify_client_secret, refresh_token)
    except Exception as e:
        error('Failed to create Spotify agent')
        error(e)
        return 1

    info('Searching Spotify for tracks')
    for entity in entities:
        try:
            entity.spotify_track = s.search_track(entity.search_term)
        except Exception as e:
            error(e)
            error('Skipping...')

    # list to Set to list - done to dedupe
    tracks_found = list(Set([entity.spotify_track for entity in entities if entity.spotify_track is not None]))
    info('Found {} Spotify tracks'.format(len(tracks_found)))

    if not (float(len(tracks_found)) / len(entities)) > options.search_threshold:
        error('Search of Spotify tracks under threshold of {}'.format(options.search_threshold))
        return 1

    if options.dry_run == False:
        try:
            info('Removing existing tracks from playlist')
            s.clear_playlist(options.playlist_id)
            info('Adding {} new tracks to playlist'.format(len(tracks_found)))
            s.add_tracks_to_playlist(options.playlist_id, tracks_found)
        except Exception as e:
            error(e)
            return 1

    info('Run completed successfully')
    return 0
Exemplo n.º 10
0
 def test_mark_as_unread(self):
     oth = Reddit(USER_AGENT)
     oth.login('PyApiTestUser3', '1111')
     found = None
     for msg in oth.user.get_inbox():
         if not msg.new:
             found = msg
             msg.mark_as_unread()
             break
     else:
         self.fail('Could not find a read message.')
     self.assertTrue(found in oth.user.get_unread())
Exemplo n.º 11
0
def parse(ignore_duty=True, ignore_resolutions=True):
  govfeed = feedparser.parse('http://www.govtrack.us/events/events.rss?'
    'feeds=misc%3Aintroducedbills')

  r = Reddit(user_agent='WatchingCongress/1.0')
  r.login('FuturistBot', '<BOTPASS>')

  for entry in govfeed.entries:
    if not entry['guid'].find('guid'):
      logging.info("Couldn't find GUID")
      continue

    if not entry['title']:
      logging.info("No title for bill: {0}".format(entry['guid']))
      continue

    if house_collection.find_one({'guid': entry['guid']}):
      logging.info("Already created story: {0}".format(entry['title']))
      continue

    if ignore_duty and 'duty' in entry['title'] and 'temporar' in entry['title']:
      logging.info("Ignored boring bill: {0}".format(entry['title']))
      continue

    if ignore_resolutions and '.Res' in entry['title']:
      logging.info("Ignored resolution: {0}".format(entry['title']))
      continue

    record = {
      'title': entry['title'],
      'description': entry['description'],
      'link': entry['link'],
      'guid': entry['guid'],
    }

    bill_number = entry['title'].split(':')[0]
    try:
      news_stories = find_news_stories(bill_number)
    except Exception as e:
      news_stories = []
      logging.error("Couldn't parse Google News: {}".format(unicode(e)))

    try:
      text = template.render(description=entry['description'],
                   link=entry['link'],
                   news_stories=news_stories)
      r.submit('futuristparty', entry['title'], text=text)
      house_collection.insert(record)
      logging.info("Created story: {0}".format(entry['title']))
    except Exception as e:
      logging.error("Exception occured: {0}".format(unicode(e)))
      time.sleep(2)
Exemplo n.º 12
0
def main():
    ''' Instantiates reddit and notification notifier '''
    reddit = Reddit()
    notifier = Notifier()
    while True:
        reddit.fetch_latest_posts()
        for notification in reddit.notifications:
            if notification:
                notifier.send_notification(message=notification.get(
                    'title', ''),
                                           link=notification.get('link', ''),
                                           title='iRNBA')
                time.sleep(5)
        time.sleep(FIVE_MINUTES)
Exemplo n.º 13
0
 def test_mark_multiple_as_read(self):
     oth = Reddit(USER_AGENT)
     oth.login('PyApiTestUser3', '1111')
     messages = []
     for msg in oth.user.get_unread(limit=None):
         if msg.author != oth.user.name:
             messages.append(msg)
             if len(messages) >= 2:
                 return
     self.assertEqual(2, len(messages))
     self.r.user.mark_as_read(messages)
     unread = oth.user.get_unread(limit=5)
     for msg in messages:
         self.assertTrue(msg not in unread)
Exemplo n.º 14
0
def main(subreddits, subreddit_list, top_n):
    reddit = Reddit(config.data_location)
    subreddits = list(subreddits)

    if subreddit_list is not None:
        subreddits.extend(
            [forum.strip().split("/")[-1] for forum in subreddit_list])

    for subreddit in subreddits:
        subr = reddit.get_subreddit(subreddit)
        top_authors = subr.top_authors(top_n)

        for auth in top_authors:
            print(subreddit + "," + auth)
Exemplo n.º 15
0
    def from_dict(self, dic):
        self.topic_name = dic['topic_name']

        procon_dict = dic['procon']
        procon = Procon()
        procon.from_dict(procon_dict)
        self.procon = procon

        reddit_dict = dic['reddit']
        reddit = Reddit()
        reddit.from_dict(reddit_dict)
        self.reddit = reddit

        self.similarity_matrices = dic['similarity_matrices']
Exemplo n.º 16
0
def main(subreddit, top_n):
    reddit = Reddit(config.data_location)

    subr = reddit.get_subreddit(subreddit)
    users = Counter(post['author'] for post in subr.posts)
    # remove deleted account
    del users['[deleted]']
    top_authors = next(zip(*users.most_common(top_n)))

    print('Pulling the following authors...')
    print('\n'.join(top_authors))
    print(top_authors)

    # pull 1000 posts for each user each time
    pull_posts(1000, authors=top_authors)
Exemplo n.º 17
0
 def run(self, host, port):
     self.reddit = Reddit(creds.key, creds.secret, creds.username, creds.password, creds.redirect_uri)
     self.reddit.updateToken()
     self.reddit.testAccess()
     sleeptime = 0
     while True:
         if sleeptime > 10:
             time.sleep(10)
         elif sleeptime > 1:
             time.sleep(1)
         # Connect to host:port, get the fp
         fp = self.connect(host, port)
     
         # Send hostname of client over initially
         hostname = socket.getfqdn()
         fp.write(hostname+'\n')
         fp.flush()
         if debug:
             print 'Sent hostname'
     
         # Recv all the urls
         reqlist = []
         newline = False
         while True:
             line = fp.readline()
             line = line.strip()
             if line != '':
                 reqlist.append(line.split(','))
             else:
                 if newline == True:
                     break
                 newline = True
             fp.flush()
     
         print host+' >> '+str(reqlist)
         # See if any urls were sent, close if zero
         if len(reqlist) == 0:
             if debug:
                 print 'No requests'
             self.close()
             sleeptime += 1
             continue
         sleeptime = 0
     
         if debug:
             print 'Downloading requests'
         # Download all the urls otherwise
         self.download_data(reqlist)
     
         # targzip the data
         targz = self.targz()
     
         # Send the data
         targz_fp = open(targz, 'rb')
         targz_data = targz_fp.read()
         fp.write(targz_data)
         fp.flush()
         print host+' << archive.tar.gz'
         self.close()
         self.cleanup()
Exemplo n.º 18
0
class QReddit:
    def __init__(self):
        self.r = Reddit()
        (self.options, args) = self.parseArguments()

        if(len(args) < 1):
            print "Please specify type of action (textpost, linkpost, viewsub, createuser)"
            return

        self.action = args[0]

        if(self.options.username and self.options.password):
            self.user = {"username":self.options.username, "password":self.options.password}
        else:
            try:
                self.user = self.getUser()
            except IOError:
                print "No user was specified through --user and --password but could not find 'user.json'. Please either use createuser or use --user and --password."
                sys.exit()

    def parseArguments(self):
        parser = optparse.OptionParser()
        parser.add_option("-s", "--subreddit", help="Specify subreddit", dest="subreddit")
        parser.add_option("-t", "--title", help="Specify title", dest="title")
        parser.add_option("-b", "--body", help="Specify post body (for text post)", dest="body")
        parser.add_option("-l", "--link", help="Specify post link (for link post)", dest="link")
        parser.add_option("-u", "--user", help="Specify username", dest="username")
        parser.add_option("-p", "--pass", help="Specify password", dest="password")
        parser.add_option("-L", "--limit", help="Limit results (for view)", type="int", dest="limit")
        parser.add_option("-i", "--postid", help="Specify post ID", dest="postid")

        return parser.parse_args()

    def performAction(self):
        if(self.action == "textpost"):
            self.r.doTextPost(self.options, self.user)
        if(self.action == "linkpost"):
            self.r.doLinkPost(self.options, self.user)
        if(self.action == "viewsub"):
            self.r.doViewsub(self.options)
        if(self.action == "viewpost"):
            self.r.doViewpost(self.options)
        if(self.action == "createuser"):
            self.createUser(self.options.username, self.options.password)

    def getUser(self):
        try:
            with open("user.json") as f:
                user = json.load(f)
        except IOError:
            raise e

        return user

    def createUser(self, username, password):
        with open("user.json", "w") as f:
            json.dump({"username":username, "password":password}, f)
Exemplo n.º 19
0
def posts2csv(post_f, authors=None, subreddits=None, seen_posts = set(), verbose=True, limit = 1000):
    reddit = Reddit(MongoClient('mongodb://127.0.0.1:27017')["reddit"])
    
    subreddits = [reddit.get_subreddit(s) for s in subreddits]
    authors = [reddit.get_user(a) for a in authors]

    subredditset = set()

    # subreddit info doesn't seem to have the "subreddit_id".   To do : get that with r/subreddit/<name>/about
    # for now, use subreddit name as forum identifier
    csvp = csv.writer(post_f)
    csvp.writerow("id,replyto,username,user_annotation_flairtext,annotation_over18,annotation_score,forum,discourse,title,when,dataset_file,post".split(","))

    for subreddit in subreddits:
        print(subreddit.name)
        postids = set(subreddit.post_ids) - seen_posts
        for i, idd in enumerate(postids):
            post = subreddit.post(idd)
            if i%1000 == 999: print("post",i,"of",len(postids),limit,"to go")
            if "selftext" not in post or post["selftext"] == "": continue   # Skip URL-only posts
            if "subreddit" not in post:
                print("No subreddit in post " + post["id"])
                continue
            if post["id"] in seen_posts: continue
            csvp.writerow([post["id"],None,post["author"],post["author_flair_text"],str(post["over_18"]),str(post["score"]),
                           post["subreddit"],"Reddit",post["title"],
                           datetime.fromtimestamp(post["created"], tz).isoformat(),
                           "reddit",post.get("selftext",post["url"])])
            limit -= 1
            if limit == 0: return

    for author in authors:
        print(author.name)
        postids = set(author.post_ids) - seen_posts
        for i,post in enumerate([author.post(id) for id in postids]):
            if i%1000 == 999: print("post",i,"of",len(postids),limit,"to go")
            if "selftext" not in post or post["selftext"] == "": continue   # Skip URL-only posts
            if "subreddit" not in post:
                print("No subreddit in post " + post["id"])
                continue
            if post["id"] in seen_posts: continue
            csvp.writerow([post["id"],None,post["author"],post["author_flair_text"],str(post["over_18"]),str(post["score"]),
                           post["subreddit"],"Reddit",post["title"],
                           datetime.fromtimestamp(post["created"], tz).isoformat(),
                           "reddit",post.get("selftext",post["url"])])
            limit -= 1
            if limit == 0: return
Exemplo n.º 20
0
def main():

    level = 0

    # Read program arguments
    for arg in sys.argv[1:]:
        (param, value) = arg.split('=')
        if param == '--level':
            level = int(value)

    path = os.path.dirname(os.path.realpath(__file__))

    loggingConf = open('{0}/configs/logging.yml'.format(path), 'r')
    logging.config.dictConfig(yaml.load(loggingConf))
    loggingConf.close()
    logger = logging.getLogger(LOGGER)

    logger.info('Program started')

    config = configparser.ConfigParser()
    config.read('{0}/configs/bot.ini'.format(path))

    username = config['Reddit']['username']
    password = config['Reddit']['password']
    user_agent = config['Reddit']['user-agent']
    dry_run = config['Bot'].getboolean('dry-run')

    if dry_run:
        logger.info('Running in dry run mode. Nothing will be commited')

    reddit = Reddit(username, password, user_agent, dry_run)
    history = History('{0}/{1}'.format(path, DATABASE))
    news = News()
    if level == 0:
        level = int(config['Bot']['level'])
    news_items = news.get_news_items(level)
    for item in news_items:
        url = item[0]
        title = item[1]
        degree = item[2]
        if not history.has_link_been_posted(url):
            history.add_link_as_posted(url, dry_run)
            if not reddit.post_link(get_redirect_url(url), title):
                continue
            break

    logger.info('Program done')
Exemplo n.º 21
0
 def test_report(self):
     # login as new user to report submission
     oth = Reddit(USER_AGENT)
     oth.login('PyApiTestUser3', '1111')
     subreddit = oth.get_subreddit(self.sr)
     submission = None
     for submission in subreddit.get_new_by_date():
         if not submission.hidden:
             break
     if not submission or submission.hidden:
         self.fail('Could not find a non-reported submission.')
     submission.report()
     # check if submission was reported
     for report in self.r.get_subreddit(self.sr).get_reports():
         if report.id == submission.id:
             break
     else:
         self.fail('Could not find reported submission.')
Exemplo n.º 22
0
def main():
    load_dotenv()

    discord_webhook_url = os.getenv("DISCORD_WEBHOOK_URL")
    subreddit = os.getenv('REDDIT_SUBREDDIT')
    reddit_client_id = os.getenv("REDDIT_CLIENT_ID")
    reddit_client_secret = os.getenv("REDDIT_CLIENT_SECRET")

    discord = Discord(discord_webhook_url)

    # Echo to discord weblink
    reddit = Reddit(subreddit, discord.sendMsg, reddit_client_id,
                    reddit_client_secret)

    # Echo locally to test.
    # reddit = Reddit(print_details, reddit_client_id, reddit_client_secret)

    reddit.run()
Exemplo n.º 23
0
 def test_report(self):
     # login as new user to report submission
     oth = Reddit(USER_AGENT)
     oth.login('PyApiTestUser3', '1111')
     subreddit = oth.get_subreddit(self.sr)
     submission = None
     for submission in subreddit.get_new_by_date():
         if not submission.hidden:
             break
     if not submission or submission.hidden:
         self.fail('Could not find a non-reported submission.')
     submission.report()
     # check if submission was reported
     for report in self.r.get_subreddit(self.sr).get_reports():
         if report.id == submission.id:
             break
     else:
         self.fail('Could not find reported submission.')
Exemplo n.º 24
0
def parse():
    govfeed = feedparser.parse('http://www.govtrack.us/events/events.rss?'
                               'feeds=misc%3Aintroducedbills')

    r = Reddit(user_agent='WatchingCongress/1.0')
    r.login('congressbot', '<BOTPASS>')

    for entry in govfeed.entries:
        if not entry['guid'].find('guid'):
            logging.info("Couldn't find GUID")
            continue

        if not entry['title']:
            logging.info("No title for bill: {}".format(entry['guid']))
            continue

        if house_collection.find_one({'guid': entry['guid']}):
            logging.info("Already created story: {}".format(entry['title']))
            continue

        if 'duty' in entry['title'] and 'temporar' in entry['title']:
            logging.info("Ignored boring bill: {}".format(entry['title']))
            continue

        if '.Res' in entry['title']:
            logging.info("Ignored resolution: {}".format(entry['title']))
            continue

        record = {
            'title': entry['title'],
            'description': entry['description'],
            'link': entry['link'],
            'guid': entry['guid'],
        }

        try:
            text = template.render(description=entry['description'],
                                   link=entry['link'])
            r.submit('watchingcongress', entry['title'], text=text)
            house_collection.insert(record)
            logging.info("Created story: {}".format(entry['title']))
        except Exception as e:
            logging.error("Exception occured: {}".format(unicode(e)))
            time.sleep(2)
Exemplo n.º 25
0
 def select_handler(self):
     handler = 'handlers.{}'.format(self.config['handler'])
     if self.config['handler'] in self._ALLOWED_HANDLERS:
         class_ = ''.join(x.title()
                          for x in self.config['handler'].split('_'))
         module = importlib.import_module(handler)
         handler_class = getattr(module, class_)
         return handler_class(Reddit(config).build(), self.config)
     else:
         raise HandlerNotAllowed('Handler "{}" not allowed'.format(handler))
Exemplo n.º 26
0
def main(subreddit):
        print "Subreddit :", subreddit
        rsub = url_data("http://www.reddit.com/r/%s/new/.json?sort=new"%subreddit, json=True)
        children = rsub['data']['children']
        r = Reddit(USERNAME, PASSWORD)
        session = r.login()
        f = open('history.txt', 'r')
        history = f.read()
        f.close()
        for child in children:
                is_self = child['data']['is_self']
                thread_id = child['data']['name']
                print thread_id
                if thread_id in history:
                        print "Thread: %s already in history"%thread_id
                        pass

                else:
                        if not is_self:
                                img_url = child['data']['url']
                                thread_id = child['data']['name']
                                repost = karmadecay(img_url)
                                if repost:
                                        text = form_comment(repost)
                                        r_resp = r.post(session, thread_id, text) 
                                        if r_resp != None:
                                                error = r_resp['json']['errors']
                                                delay = find_digit(error[0][1])
                                                print "waiting: %s seconds" %delay*60
                                                time.sleep(delay*60) 
                                                r.post(session, thread_id, text) 
                                        f = open('history.txt', 'a')
                                        f.write("\n%s"%thread_id)
                                        print text
                                        f.close()
                                        time.sleep(1)
                                        print "Comment Posted:", thread_id 
                                else:
                                        pass
                        else:
                                pass
        print "Finished"
        return
Exemplo n.º 27
0
def get_authors_timeline(author: str, topics: List[str]) -> AuthorTimeline:
    reddit = Reddit(config.data_location)
    posts: Dict[str, TimelinePost] = {}

    for topic in topics:
        df = TopicsDFCache.load(topic)
        filtered_df = df[(df.Author == author)]
        for _, row in filtered_df.iterrows():
            post_id = row['SeqId']
            sentence_number = row['InstNo']
            text = row['Text']
            sent = TimelineSentence(sentence_number, text, topic)
            if post_id not in posts:
                create_time = reddit.get_post(post_id)['created_utc']
                posts[post_id] = TimelinePost(post_id, create_time)
            posts[post_id].sentences = list(
                sorted(posts[post_id].sentences + [sent],
                       key=lambda x: x.number))

    sorted_posts = list(sorted(posts.values(), key=lambda x: x.timestamp))

    # selected_posts = list()
    #
    # year_dict = {2012:0,2013:0,2014:0,2015:0,2016:0,2017:0,2018:0}
    # month_dict = {1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0}
    #
    # for item in sorted_posts:
    #     if date.fromtimestamp(item.timestamp).year in year_dict.keys():
    #         year_dict[date.fromtimestamp(item.timestamp).year] += 1
    # year = max(year_dict.items(), key=operator.itemgetter(1))[0]
    # for item in sorted_posts:
    #     if date.fromtimestamp(item.timestamp).year == year:
    #         if date.fromtimestamp(item.timestamp).month in month_dict.keys():
    #             month_dict[date.fromtimestamp(item.timestamp).month] += 1
    # month = max(month_dict.items(),key=operator.itemgetter(1))[0]
    #
    # for item in sorted_posts:
    #     if date.fromtimestamp(item.timestamp).year==year and date.fromtimestamp(item.timestamp).month==month
    #             selected_posts.append(item.sentences)

    # print(f'{author} most frequent year {year} and month {month}')
    return AuthorTimeline(author, sorted_posts)
Exemplo n.º 28
0
class PostCollectorTestCase(unittest.TestCase):
    def setUp(self):
        self.reddit = Reddit()
        self.somePost = self.reddit.client.submission(id='8reg0o')

    def test_downloadImageFromSubmission(self):
        self.filename = self.reddit.downloadImageFromSubmission(self.somePost)
        self.assertEqual("temp\\vfvxr2xvd8411.jpg", self.filename)

    def tearDown(self):
        os.remove(self.filename)
Exemplo n.º 29
0
 def setUp(self):
     super(RedditTest, self).setUp()
     oauth_dropins.reddit.REDDIT_APP_KEY = 'my_app_key'
     oauth_dropins.reddit.REDDIT_APP_SECRET = 'my_app_secret'
     self.handler.messages = []
     user = oauth_dropins.reddit.praw_to_user(gr_reddit_test.FakeRedditor())
     self.auth_entity = oauth_dropins.reddit.RedditAuth(
         id='my_string_id',
         refresh_token='silly_token',
         user_json=json_dumps(user))
     self.auth_entity.put()
     self.r = Reddit.new(self.handler, auth_entity=self.auth_entity)
Exemplo n.º 30
0
 def setUp(self):
     super().setUp()
     oauth_dropins.reddit.REDDIT_APP_KEY = 'my_app_key'
     oauth_dropins.reddit.REDDIT_APP_SECRET = 'my_app_secret'
     user = oauth_dropins.reddit.praw_to_user(gr_reddit_test.FakeRedditor())
     user['name'] = 'bONkerFIeld'
     self.auth_entity = oauth_dropins.reddit.RedditAuth(
         id='my_string_id',
         refresh_token='silly_token',
         user_json=json_dumps(user))
     self.auth_entity.put()
     self.r = Reddit.new(auth_entity=self.auth_entity)
Exemplo n.º 31
0
def main():
    reddit = Reddit(config.data_location)
    # following code explores saving user posts per user
    # for user in reddit.get_users():
    #     os.mkdir('../tmp/{user.name}')
    #     #with open(f'../tmp/{user.name}.csv', 'w') as fp:
    #         #csv_file = csv.writer(fp)
    #         #count = 0
    #         for post in user.posts:
    #             with open(f'../tmp/{user.name}.csv', 'w') as fp:
    #             if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]':
    #                 #csv_file.writerow([post.get('id'), time.ctime(post['created_utc']), post.get('subreddit'), post.get('selftext').replace('\n', ' ')])
    #                 fp.write(post.get('selftext').replace('\n', ' '))
    #                 fp.write('\n')
    # following code explores saving user posts per user per post
    # for user in reddit.get_users():
    #     dirpath = '../user_posts/'+user.name
    #     os.mkdir(dirpath)
    #     for post in user.posts:
    #         if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]':
    #             filepath = os.path.join(dirpath, post.get('id')+'.txt')
    #             with open(filepath, 'w') as fp:
    #                 fp.write(post.get('selftext').replace('\n',' '))

    # following code save all user posts into one file
    # with open (f'../all_posts/all.txt', 'w') as fp:
    #     for user in reddit.get_users():
    #         print('Processing ' + str(user.name) + ' \'s history')
    #         for post in user.posts:
    #             if 'selftext' in post and post['selftext'] and post['selftext'] != '[removed]':
    #                 fp.write(post.get('selftext').replace('\n', ' '))
    #                 fp.write('\n')
    # the following code saves a text file per user
    for user in reddit.get_users():
        with open(f'../user_history/{user.name}.txt', 'w') as fp:
            for post in user.posts:
                if 'selftext' in post and post[
                        'selftext'] and post['selftext'] != '[removed]':
                    fp.write(post.get('selftext').replace('\n', ' '))
                    fp.write('\n')
def main(subreddit_file, database):
    cur.execute("""select entity_source_id from data_source_instance where entity_source_descriptor = 'reddit#id#POST';""")
    keys = {row["entity_source_id"] for row in cur.fetchall()}
    
    subreddits = {s.strip() for s in subreddit_file}
    keys2 = set()
    for subreddit in subreddits:
        s = Reddit.get_subreddit(subreddit)
        keys2.extend({p["id"] for p in s.posts})
    
    print "Found ", len(keys), "keys in",db," versus",len(keys2),"keys in directory"
    
    print "\n".join(list(keys2.difference(keys))[:1000])
Exemplo n.º 33
0
def main():
    startTime = time()
    parser = argparse.ArgumentParser(
        description='Scraps /r/GameDeals for yuge deals.')
    parser.add_argument('--sleep',
                        type=int,
                        default=5,
                        help='Sleep duration before autorun cmd window closes')

    args = parser.parse_args()

    reddit = Reddit(TARGET_SUBREDDIT)
    reddit.start()

    execTime = time() - startTime

    sleep(0.5)

    print(f"Execution took {int(execTime)} seconds")
    helpers.print_animated_text("Exiting in", args.sleep, helpers.countdown)
    sleep(args.sleep)
    helpers.done()
Exemplo n.º 34
0
def main(subreddit_list, keyword_list, topic_list):
    reddit = Reddit(config.data_location)
    subreddits = {
        subredit.strip().split("/")[-1]
        for subredit in subreddit_list
    }

    keywords = {keyword.strip().lower() for keyword in keyword_list}
    print(keywords)
    topics = {topic.strip().lower() for topic in topic_list}
    print(topics)

    for subreddit in subreddits:
        sub = reddit.get_subreddit(subreddit)
        with open(f'../lgbtq/data/{subreddit}.csv', 'w') as fp:
            csv_file = csv.writer(fp)
            csv_file.writerow([
                'PostId', 'PostTime', 'author', 'PostContent', 'MatchingWord',
                'MatchTopic'
            ])
            for post in sub.posts:
                if 'selftext' in post and post['selftext'] and post[
                        'selftext'] != '[removed]' and post[
                            'author'] != '[deleted]' and post[
                                'author'] != 'AutoModerator':
                    content_post = post.get('selftext').replace('\n',
                                                                ' ').lower()
                    clean_text = clean(content_post)
                    match_1 = match(keywords, clean_text)
                    match_2 = match(topics, clean_text)

                    if len(set(match_1)) != 0 or len(set(match_2)) != 0:
                        csv_file.writerow([
                            post.get('id'),
                            time.ctime(post.get('created_utc')),
                            post['author'], clean_text,
                            set(match_1) if len(match_1) > 0 else None,
                            set(match_2) if len(match_2) > 0 else None
                        ])
Exemplo n.º 35
0
    def __init__(self, file):
        # Initialize the Bot
        super().__init__(file)
        self.update_time = datetime.now()

        # Load the configurations.
        with open(file, 'r') as y:
            # Load the configs
            config = yaml.load(y)

        # Grab the database filename from the configs.
        self.dbfile = config['database']
        # Create a Reddit object to handle the Reddit-specific tasks.
        self.reddit = Reddit(self.dbfile)
Exemplo n.º 36
0
 def __init__(self, subreddit, site, verbosity):
     self.reddit = Reddit(str(self), site)
     self.subreddit = self.reddit.get_subreddit(subreddit)
     self.verbosity = verbosity
     self.submissions = []
     self.comments = []
     self.submitters = defaultdict(list)
     self.commenters = defaultdict(list)
     self.min_date = 0
     self.max_date = time.time() - DAYS_IN_SECONDS * 3
     self.prev_srs = None
     # Config
     self.reddit.config.comment_limit = -1  # Fetch max comments possible
     self.reddit.config.comment_sort = 'top'
Exemplo n.º 37
0
 def authenticate(self, username, password, request=None):
     
     try:
         reddit = Reddit(user_agent=USER_AGENT)
         reddit.login(username, password)
         r_user = reddit.user
         
     except urllib2.URLError:
         log.warning("Could not reach reddit. Is it down?")
         r_user = None
     except InvalidUserPass:
         log.Info(_('User "%s" tried to login without valid credentials')%username)
         return None
     except urllib2.HTTPError as e:
         log.Info(_('User "%s" tried to login without valid credentials')%username)
         return None
     
     try:
         db_user = User.objects.get(username__iexact=username)
         if not r_user and not db_user.check_password(password):
             return None
         if not db_user.is_active: #instead of deleting users, disable them.
             return None
     except User.DoesNotExist:
         #Rules for Joining
         if r_user and r_user.comment_karma >= JOIN_MIN_COMMENT_KARMA \
                   and r_user.link_karma >= JOIN_MIN_LINK_KARMA \
                   and (datetime.now() - datetime.utcfromtimestamp(r_user.created_utc)) >= JOIN_MIN_MEMBER_TIME:
             db_user = User(username=username, is_active=True)
         else:
             return None
     
     db_user.set_password(password) # Hash and store password for offline logins
     db_user.backend = self.__class__.__name__
     db_user.save()
     return db_user
Exemplo n.º 38
0
    def __init__(self, topic_settings):
        if 'topic-name' not in topic_settings:
            print("Topic: Provide a topic name")
            exit(-12312)

        self.topic_name = topic_settings['topic-name']

        if 'procon' not in topic_settings:
            print('Topic: Provide procon settings')
            exit(-124)

        procon_settings = topic_settings['procon']

        if 'reddit' not in topic_settings:
            print('Topic: Provide reddit settings')
            exit(-3324)

        reddit_settings = topic_settings['reddit']

        procon_settings['topic'] = self.topic_name
        reddit_settings['topic'] = self.topic_name

        self.procon = Procon(procon_settings)
        self.reddit = Reddit(reddit_settings)
Exemplo n.º 39
0
    def __init__(self, topic_settings={}):
        if topic_settings == {}:
            print("Topic: __init__: No settings given. Creating empty object.")
            return

        if 'topic-name' not in topic_settings:
            print("Topic: Provide a topic name")
            exit(-12312)

        self.topic_name = topic_settings['topic-name']

        if 'procon' not in topic_settings:
            print('Topic: Provide procon settings')
            exit(-124)

        procon_settings = topic_settings['procon']

        if 'reddit' not in topic_settings:
            print('Topic: Provide reddit settings')
            exit(-3324)

        reddit_settings = topic_settings['reddit']

        procon_settings['topic'] = self.topic_name
        reddit_settings['topic'] = self.topic_name

        self.procon = Procon(procon_settings)
        self.reddit = Reddit(reddit_settings)

        self.similarity_matrices = {}

        for name, similarity_matrix_algorithm in similarity_matrix_algorithms.items(
        ):
            self.similarity_matrices[name] = similarity_matrix_algorithm.match(
                [comment.text for comment in self.get_all_comments()],
                self.get_pros(), self.get_cons())
Exemplo n.º 40
0
 def __init__(self, updater):
     self.sd = SpoilerDetector()
     self.ms = MythicSpoiler()
     self.yolo = Yolo(config.model, config.classes, config.conf)
     self.reddit = Reddit(subreddit="magicTCG")
     self.scryfall_futur_cards_id = []
     self.reddit_futur_cards_subm_id = []
     self.mythicspoiler_futur_cards_url = []
     self.limit_days = 45
     # List of Spoiler Objects
     limit_date = datetime.today() - timedelta(days=self.limit_days)
     self.spoiled = Session.query(Spoiler).filter(
         Spoiler.found_at > limit_date).all()
     # Job queues:
     updater.job_queue.run_repeating(self.general_crawl,
                                     interval=60,
                                     first=10)
Exemplo n.º 41
0
    def __init__(self):
        self.r = Reddit()
        (self.options, args) = self.parseArguments()

        if(len(args) < 1):
            print "Please specify type of action (textpost, linkpost, viewsub, createuser)"
            return

        self.action = args[0]

        if(self.options.username and self.options.password):
            self.user = {"username":self.options.username, "password":self.options.password}
        else:
            try:
                self.user = self.getUser()
            except IOError:
                print "No user was specified through --user and --password but could not find 'user.json'. Please either use createuser or use --user and --password."
                sys.exit()
Exemplo n.º 42
0
def process_topic(topic):
    reddit = Reddit(config.data_location)
    df = TopicsDFCache.load(topic)

    # Add Year column
    #df['Year'] = df.progress_apply(partial(get_year_from_row, reddit), axis=1)
    df['Year-Month'] = df.progress_apply(partial(get_year_month_from_row,
                                                 reddit),
                                         axis=1)
    # min =df['Year'].min()
    # max =df['Year'].max()
    #
    # print(f'Year range from {min} to {max}')

    # Filter out unneeded authors
    df = df[df['Author'].isin(full_authors)]

    df.to_csv(
        os.path.join(config.topic_dir,
                     f'{topic}-filtered-with_year_and_month.csv'))
Exemplo n.º 43
0
def get_user(user):
    '''
    Gets the information from the specified public user object.

    @param: the PublicUser object that points to the target user
    @return: a 2D parallel list: [img_urls, titles, scores, authors]
    '''
    # initializes the reddit praw wrapper
    reddit = Reddit(user.returnSubreddit()[2], NUM_POSTS)

    # gets the data from reddit
    img_urls = reddit.getImageUrl()
    titles = reddit.getTitle()
    scores = reddit.getScore()
    authors = reddit.getAuthor()

    return [img_urls, titles, scores, authors]
Exemplo n.º 44
0
class Topic:
    def __init__(self, topic_settings):
        if 'topic-name' not in topic_settings:
            print("Topic: Provide a topic name")
            exit(-12312)

        self.topic_name = topic_settings['topic-name']

        if 'procon' not in topic_settings:
            print('Topic: Provide procon settings')
            exit(-124)

        procon_settings = topic_settings['procon']

        if 'reddit' not in topic_settings:
            print('Topic: Provide reddit settings')
            exit(-3324)

        reddit_settings = topic_settings['reddit']

        procon_settings['topic'] = self.topic_name
        reddit_settings['topic'] = self.topic_name

        self.procon = Procon(procon_settings)
        self.reddit = Reddit(reddit_settings)

    def getAllComments(self):
        return self.reddit.getAllComments()

    def getPros(self):
        return self.procon.pros

    def getCons(self):
        return self.procon.cons


#topic = Topic({'topic-name': 'medical marijuana', 'procon': {'mode': 'find'}, 'reddit': {'mode': 'find'}})
#print(topic.procon.background)
Exemplo n.º 45
0
def get_category(category):
    '''
    Gets the information from the specified public category object.

    @param: Category object representing the target category
    @return: a 2D parallel list: [img_urls, titles, scores, authors]
    '''

    # initializes the reddit praw wrapper
    reddit = Reddit(category.subreddit, NUM_POSTS)

    # gets the data from reddit
    img_urls = reddit.getImageUrl()
    titles = reddit.getTitle()
    scores = reddit.getScore()
    authors = reddit.getAuthor()

    return [img_urls, titles, scores, authors]
Exemplo n.º 46
0
 def __init__(self, subreddit, site=None, verbose=None):
     self.reddit = Reddit(str(self), site)
     self.sub = self.reddit.get_subreddit(subreddit)
     self.verbose = verbose
     self._current_flair = None
Exemplo n.º 47
0
 def post_to_reddit(self):
     reddit_connection = Reddit(user_agent='wootbot/1.0')
     reddit_connection.login(bot_username, bot_password)
     reddit_connection.submit('woot', self.__str__(), url=self.url)
Exemplo n.º 48
0
class SubRedditStats(object):
    VERSION = '0.2.0'

    post_prefix = 'Subreddit Stats:'
    post_header = '---\n###%s\n'
    post_footer = ('>Generated with [BBoe](/user/bboe)\'s [Subreddit Stats]'
                   '(https://github.com/bboe/subreddit_stats)  \n%s'
                   'SRS Marker: %d')
    re_marker = re.compile('SRS Marker: (\d+)')

    @staticmethod
    def _previous_max(submission):
        try:
            val = SubRedditStats.re_marker.findall(submission.selftext)[-1]
            return float(val)
        except (IndexError, TypeError):
            print 'End marker not found in previous submission. Aborting'
            sys.exit(1)

    @staticmethod
    def _permalink(permalink):
        tokens = permalink.split('/')
        if tokens[8] == '':  # submission
            return '/comments/%s/_/' % (tokens[6])
        else:  # comment
            return '/comments/%s/_/%s?context=1' % (tokens[6], tokens[8])

    @staticmethod
    def _user(user):
        return '[%s](/user/%s)' % (user.replace('_', '\_'), user)

    def __init__(self, subreddit, site, verbosity):
        self.reddit = Reddit(str(self), site)
        self.subreddit = self.reddit.get_subreddit(subreddit)
        self.verbosity = verbosity
        self.submissions = []
        self.comments = []
        self.submitters = defaultdict(list)
        self.commenters = defaultdict(list)
        self.min_date = 0
        self.max_date = time.time() - DAYS_IN_SECONDS * 3
        self.prev_srs = None
        # Config
        self.reddit.config.comment_limit = -1  # Fetch max comments possible
        self.reddit.config.comment_sort = 'top'

    def __str__(self):
        return 'BBoe\'s SubRedditStats %s' % self.VERSION

    def login(self, user, pswd):
        if self.verbosity > 0:
            print 'Logging in'
        self.reddit.login(user, pswd)

    def msg(self, msg, level, overwrite=False):
        if self.verbosity >= level:
            sys.stdout.write(msg)
            if overwrite:
                sys.stdout.write('\r')
                sys.stdout.flush()
            else:
                sys.stdout.write('\n')

    def prev_stat(self, prev_url):
        submission = self.reddit.get_submission(prev_url)
        self.min_date = self._previous_max(submission)
        self.prev_srs = prev_url

    def fetch_recent_submissions(self, max_duration, after, exclude_self,
                                 since_last=True):
        '''Fetches recent submissions in subreddit with boundaries.

        Does not include posts within the last three days as their scores may
        not be representative.

        Keyword arguments:
        max_duration -- When set, specifies the number of days to include
        after -- When set, fetch all submission after this submission id.
        exclude_self -- When true, don't include self posts.
        since_last -- When true use info from last submission to determine the
                      stop point
        '''
        if max_duration:
            self.min_date = self.max_date - DAYS_IN_SECONDS * max_duration
        url_data = {'after': after} if after else None
        self.msg('DEBUG: Fetching submissions', 1)
        for submission in self.subreddit.get_new_by_date(limit=None,
                                                         url_data=url_data):
            if submission.created_utc > self.max_date:
                continue
            if submission.created_utc <= self.min_date:
                break
            if (since_last and str(submission.author) == str(self.reddit.user)
                and submission.title.startswith(self.post_prefix)):
                # Use info in this post to update the min_date
                # And don't include this post
                self.msg('Found previous: %s' % submission.title, 2)
                if self.prev_srs == None:  # Only use the most recent
                    self.min_date = max(self.min_date,
                                        self._previous_max(submission))
                    self.prev_srs = submission.permalink
                continue
            if exclude_self and submission.is_self:
                continue
            self.submissions.append(submission)
        self.msg('DEBUG: Found %d submissions' % len(self.submissions), 1)
        if len(self.submissions) == 0:
            return False

        # Update real min and max dates
        self.submissions.sort(key=lambda x: x.created_utc)
        self.min_date = self.submissions[0].created_utc
        self.max_date = self.submissions[-1].created_utc
        return True

    def fetch_top_submissions(self, top, exclude_self):
        '''Fetches top 1000 submissions by some top value.

        Keyword arguments:
        top -- One of week, month, year, all
        exclude_self -- When true, don't include self posts.
        '''
        if top not in ('day', 'week', 'month', 'year', 'all'):
            raise TypeError('%r is not a valid top value' % top)
        self.msg('DEBUG: Fetching submissions', 1)
        url_data = {'t': top}
        for submission in self.subreddit.get_top(limit=None,
                                                 url_data=url_data):
            if exclude_self and submission.is_self:
                continue
            self.submissions.append(submission)
        self.msg('DEBUG: Found %d submissions' % len(self.submissions), 1)
        if len(self.submissions) == 0:
            return False

        # Update real min and max dates
        self.submissions.sort(key=lambda x: x.created_utc)
        self.min_date = self.submissions[0].created_utc
        self.max_date = self.submissions[-1].created_utc
        return True

    def process_submitters(self):
        self.msg('DEBUG: Processing Submitters', 1)
        for submission in self.submissions:
            if submission.author:
                self.submitters[str(submission.author)].append(submission)

    def process_commenters(self):
        num = len(self.submissions)
        self.msg('DEBUG: Processing Commenters on %d submissions' % num, 1)
        for i, submission in enumerate(self.submissions):
            self.msg('%d/%d submissions' % (i + 1, num), 2, overwrite=True)
            if submission.num_comments == 0:
                continue
            try:
                self.comments.extend(submission.all_comments_flat)
            except Exception as exception:
                print 'Exception fetching comments on %r: %s' % (submission.content_id,
                                                                 str(exception))
            for orphans in submission._orphaned.values():
                self.comments.extend(orphans)
        for comment in self.comments:
            if comment.author:
                self.commenters[str(comment.author)].append(comment)

    def basic_stats(self):
        sub_ups = sum(x.ups for x in self.submissions)
        sub_downs = sum(x.downs for x in self.submissions)
        comm_ups = sum(x.ups for x in self.comments)
        comm_downs = sum(x.downs for x in self.comments)

        sub_up_perc = sub_ups * 100 / (sub_ups + sub_downs)
        comm_up_perc = comm_ups * 100 / (comm_ups + comm_downs)

        values = [('Total', len(self.submissions), '', len(self.comments), ''),
                  ('Unique Redditors', len(self.submitters), '',
                   len(self.commenters), ''),
                  ('Upvotes', sub_ups, '%d%%' % sub_up_perc,
                   comm_ups, '%d%%' % comm_up_perc),
                  ('Downvotes', sub_downs, '%d%%' % (100 - sub_up_perc),
                   comm_downs, '%d%%' % (100 - comm_up_perc))]

        retval = '||Submissions|%|Comments|%|\n:-:|--:|--:|--:|--:\n'
        for quad in values:
            retval += '__%s__|%d|%s|%d|%s\n' % quad
        return '%s\n' % retval

    def top_submitters(self, num, num_submissions):
        num = min(num, len(self.submitters))
        if num <= 0:
            return ''

        top_submitters = sorted(self.submitters.items(), reverse=True,
                                key=lambda x: (sum(y.score for y in x[1]),
                                               len(x[1])))[:num]

        retval = self.post_header % 'Top Submitters\' Top Submissions'
        for (author, submissions) in top_submitters:
            retval += '0. %d pts, %d submissions: %s\n' % (
                sum(x.score for x in submissions), len(submissions),
                self._user(author))
            for sub in sorted(submissions, reverse=True,
                              key=lambda x: x.score)[:num_submissions]:
                title = sub.title.replace('\n', ' ').strip()
                if sub.permalink != sub.url:
                    retval += '  0. [%s](%s)' % (title, sub.url)
                else:
                    retval += '  0. %s' % title
                retval += ' (%d pts, [%d comments](%s))\n' % (
                    sub.score, sub.num_comments,
                    self._permalink(sub.permalink))
            retval += '\n'
        return retval

    def top_commenters(self, num):
        score = lambda x: x.ups - x.downs

        num = min(num, len(self.commenters))
        if num <= 0:
            return ''

        top_commenters = sorted(self.commenters.items(), reverse=True,
                                key=lambda x: (sum(score(y) for y in x[1]),
                                               len(x[1])))[:num]

        retval = self.post_header % 'Top Commenters'
        for author, comments in top_commenters:
            retval += '0. %s (%d pts, %d comments)\n' % (
                self._user(author), sum(score(x) for x in comments),
                len(comments))
        return '%s\n' % retval

    def top_submissions(self, num):
        num = min(num, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted(self.submissions, reverse=True,
                                 key=lambda x: x.score)[:num]

        retval = self.post_header % 'Top Submissions'
        for sub in top_submissions:
            author = str(sub.author)
            title = sub.title.replace('\n', ' ').strip()
            if sub.permalink != sub.url:
                retval += '0. [%s](%s)' % (title, sub.url)
            else:
                retval += '0. %s' % title
            retval += ' by %s (%d pts, [%d comments](%s))\n' % (
                self._user(author), sub.score, sub.num_comments,
                self._permalink(sub.permalink))
        return '%s\n' % retval

    def top_comments(self, num):
        score = lambda x: x.ups - x.downs

        num = min(num, len(self.comments))
        if num <= 0:
            return ''

        top_comments = sorted(self.comments, reverse=True,
                                 key=score)[:num]
        retval = self.post_header % 'Top Comments'
        for comment in top_comments:
            author = str(comment.author)
            title = comment.submission.title.replace('\n', ' ').strip()
            retval += ('0. %d pts: %s\'s [comment](%s) in %s\n'
                       % (score(comment), self._user(author),
                          self._permalink(comment.permalink), title))
        return '%s\n' % retval

    def publish_results(self, subreddit, submitters, commenters, submissions,
                        comments, top, debug=False):
        def timef(timestamp):
            dtime = datetime.fromtimestamp(timestamp)
            return dtime.strftime('%Y-%m-%d %H:%M PDT')

        title = '%s %s %ssubmissions from %s to %s' % (
            self.post_prefix, str(self.subreddit), 'top ' if top else '',
            timef(self.min_date), timef(self.max_date))
        if self.prev_srs:
            prev = '[Previous Stat](%s)  \n' % self._permalink(self.prev_srs)
        else:
            prev = ''

        basic = self.basic_stats()
        t_commenters = self.top_commenters(commenters)
        t_submissions = self.top_submissions(submissions)
        t_comments = self.top_comments(comments)
        footer = self.post_footer % (prev, self.max_date)

        body = ''
        num_submissions = 10
        while body == '' or len(body) > MAX_BODY_SIZE and num_submissions > 2:
            t_submitters = self.top_submitters(submitters, num_submissions)
            body = (basic + t_submitters + t_commenters + t_submissions +
                    t_comments + footer)
            num_submissions -= 1

        if len(body) > MAX_BODY_SIZE:
            print 'The resulting message is too big. Not submitting.'
            debug = True

        if not debug:
            msg = ('You are about to submit to subreddit %s as %s.\n'
                   'Are you sure? yes/[no]: ' % (subreddit,
                                                 str(self.reddit.user)))
            if raw_input(msg).lower() not in ['y', 'yes']:
                print 'Submission aborted'
            else:
                try:
                    self.reddit.submit(subreddit, title, text=body)
                    return
                except Exception, error:
                    print 'The submission failed:', error

        # We made it here either to debug=True or an error.
        print title
        print body
Exemplo n.º 49
0
class Client:
    sock = None
    reddit = None
    
    def download_a(self, after=None):
        download_dir = os.path.join(tmpdir, 'client', 'staging')
        posts, nav = self.reddit.getListing('all', after)
        blob = { 'posts': posts, 'nav': nav }
        fp = open(os.path.join(download_dir, 'a_'+after), 'w')
        fp.write(json.dumps(blob))
        fp.close()
        return True

    def download_p(self, pid):
        download_dir = os.path.join(tmpdir, 'client', 'staging')
        self.reddit.updateToken()
        post, comments = self.reddit.getPost(pid)
        blob = { 'post': post, 'comments': comments }
        fp = open(os.path.join(download_dir, 'p_'+pid), 'w')
        fp.write(json.dumps(blob))
        fp.close()
        return True

    def download_u(self, user):
        download_dir = os.path.join(tmpdir, 'client', 'staging')
        url = 'http://www.reddit.com/user/'+user+'.json'
        filename = 'u_'+user
        data = self.download_get(url, os.path.join(download_dir, filename), True)
        if data == '':
            return False
        blob = json.loads(data)
        nav = parser.extract_listing_nav(blob)
        while nav['after'] is not None:
            newurl = url+'?after='+nav['after']
            filename = 'u_'+user+'_'+nav['after']
            data = self.download_get(newurl, os.path.join(download_dir, filename), True)
            if data == '':
                return False
            blob = json.loads(data)
            nav = parser.extract_listing_nav(blob)
        return True
    
    def download_req(self, req):
        # format of request:
        # | a | <pid>
        # | p | <pid>
        # | u | <username> | <after>
        res = True
        if req[0] == 'a':
            res = self.download_a(req[1])
        elif req[0] == 'p':
            res = self.download_p(req[1])
        elif req[0] == 'u':
            res = self.download_u(req[1])
        return res

    def download_data(self, reqlist):
        for req in reqlist:
            self.download_req(req)
            print '  -- '+str(req)
    
    def connect(self, host, port):
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect((host, port))
        self.sock = sock
        fp = sock.makefile('rb+')
        print 'Connected to '+host+':'+str(port)
        return fp
    
    def close(self):
        self.sock.close()
        self.sock = None
    
    def cleanup(self):
        download_dir = os.path.join(tmpdir, 'client', 'staging')
        for entry in os.listdir(download_dir):
            os.unlink(os.path.join(download_dir, entry))
        os.unlink(os.path.join(tmpdir, 'client', 'archive.tar.gz'))

    def targz(self):
        return shutil.make_archive(os.path.join(tmpdir, 'client', 'archive'), 'gztar', os.path.join(tmpdir, 'client', 'staging'))

    def run(self, host, port):
        self.reddit = Reddit(creds.key, creds.secret, creds.username, creds.password, creds.redirect_uri)
        self.reddit.updateToken()
        self.reddit.testAccess()
        sleeptime = 0
        while True:
            if sleeptime > 10:
                time.sleep(10)
            elif sleeptime > 1:
                time.sleep(1)
            # Connect to host:port, get the fp
            fp = self.connect(host, port)
        
            # Send hostname of client over initially
            hostname = socket.getfqdn()
            fp.write(hostname+'\n')
            fp.flush()
            if debug:
                print 'Sent hostname'
        
            # Recv all the urls
            reqlist = []
            newline = False
            while True:
                line = fp.readline()
                line = line.strip()
                if line != '':
                    reqlist.append(line.split(','))
                else:
                    if newline == True:
                        break
                    newline = True
                fp.flush()
        
            print host+' >> '+str(reqlist)
            # See if any urls were sent, close if zero
            if len(reqlist) == 0:
                if debug:
                    print 'No requests'
                self.close()
                sleeptime += 1
                continue
            sleeptime = 0
        
            if debug:
                print 'Downloading requests'
            # Download all the urls otherwise
            self.download_data(reqlist)
        
            # targzip the data
            targz = self.targz()
        
            # Send the data
            targz_fp = open(targz, 'rb')
            targz_data = targz_fp.read()
            fp.write(targz_data)
            fp.flush()
            print host+' << archive.tar.gz'
            self.close()
            self.cleanup()
Exemplo n.º 50
0
def get_messages():
    r = Reddit(user_agent='calpoly-flair')
    r.login(user=REDDIT_USERNAME, password=REDDIT_PASSWORD)
    inbox = r.get_inbox()
    return inbox.get_messages()
Exemplo n.º 51
0
class ShowerThoughtBot(Bot):
    def __init__(self, file):
        # Initialize the Bot
        super().__init__(file)
        self.update_time = datetime.now()

        # Load the configurations.
        with open(file, 'r') as y:
            # Load the configs
            config = yaml.load(y)

        # Grab the database filename from the configs.
        self.dbfile = config['database']
        # Create a Reddit object to handle the Reddit-specific tasks.
        self.reddit = Reddit(self.dbfile)


    def parse_message(self, msg, chan, fromNick):
        # logger.debug("parse_message starting with msg " + msg)
        if msg.find("PING :") != -1:
            self.ping()
        elif (msg.find(":hello {}".format(self.nick)) != -1 or
              msg.find(":hello, {}".format(self.nick)) != -1 or
              msg.find(":hi {}".format(self.nick)) != -1):
            logger.info(msg)
            self.hello(chan, fromNick)
        elif (msg.find(":!showerthought") != -1 or
              msg.find(":{}: thought".format(self.nick)) != -1 or
              msg.find(":!stb thought") != -1):
            logger.info(msg)
            self.print_shower_thought(chan, fromNick)
        elif (msg.find(":{}: help".format(self.nick)) != -1 or
              msg.find(":!stb help") != -1):
            logger.info(msg)
            self.print_help(chan)
        elif (msg.find(":!stb source") != -1 or
              msg.find(":{}: source".format(self.nick)) != -1):
            logger.info(msg)
            self.print_source_link(chan)
        elif msg.find(":{}: updatedb".format(self.nick)) != -1:
            if not fromNick == 'mlane':
                self.send_message(chan, "Don't tell me what to do!")
            else:
                self.send_message(chan, "Pulling in some thoughts.")
                self.update_database(False)
        elif msg.find(":{}: shruggie".format(self.nick)) != -1:
            logger.debug("trying to print shruggie")
            self.print_shruggie(chan)
        else:
            logger.info(msg)
            return


    def print_source_link(self, chan):
        self.send_message(chan, "ShowerThoughtBot is by Mike Lane, "
                                "https://github.com/mikelane/ShowerThoughtBot")
        self.send_message(chan, "Feel free to fork or report issues.")


    def print_help(self, chan):
        lines = []
        lines.append("I respond to {}: $command or !stb command".format(
            self.nick))
        lines.append("$command = [help|thought|source]")
        lines.append("Get a shower thought with !showerthought.")
        lines.append("More to come...")
        lines.append("[email protected] for bugs.")

        for line in lines:
            self.send_message(chan, line)


    def print_shower_thought(self, chan, nick):
        # #self.db_lock.acquire()
        db = DBAdapter(self.dbfile)
        thought = db.get_random_thought()
        self.send_message(chan, "okay {}: \"{}\" -{}\r\n".format(
            nick, thought[1], thought[2]))


    def print_shruggie(self, chan):
        self.send_message(chan, "\udcc2\udcaf\_("
                          "\udce3\udc83\udc84)_/\udcc2\udcaf")


    def update_database(self, Scheduled=True):
        if Scheduled:
            now = datetime.now()
            duration = now - self.update_time
            duration = int(duration.total_seconds())
            if duration >= 86400:
                logger.info('Updating database on schedule.')
                self.update_time = now
                #self.db_lock.acquire()
                self.reddit.get_daily_top()
                #self.db_lock.release()
        else:
            self.reddit.get_daily_top()


    def message_handler(self, message):
        """The message handler breaks out the channel and nick of the sender
        and passes this on to the parser.
        """
        logger.debug("message_handler started with message " + message)
        chan = re.search('(\#\w+ )', message)
        if chan:
            chan = chan.group(1)
        fromNick = re.search('(\:\w+\!)', message)
        if fromNick:
            fromNick = fromNick.group(1)
            fromNick = fromNick.strip(':!')
        self.parse_message(message, chan, fromNick)
        return


    # Run the bot!
    def run(self):
        messages = []
        while True:
            buffer = self.read()
            if len(buffer) > 0:
                messages = buffer.splitlines()
                buffer = ""
            while len(messages) > 0:
                self.message_handler(messages.pop(0))
            self.update_database()
            time.sleep(1)
Exemplo n.º 52
0
 def test_mark_as_read(self):
     oth = Reddit('reddit_api test suite')
     oth.login('PyApiTestUser3', '1111')
     msg = oth.user.get_unread(limit=1).next()  # pylint: disable-msg=E1101
     msg.mark_as_read()
     self.assertTrue(msg not in list(oth.user.get_unread(limit=5)))
Exemplo n.º 53
0
 def __init__(self):
   self._reddit = Reddit(user_agent='anagram_bot')
   self._anagram = Wordplay()
   self._maintainer = None
   self._output = AnagramBot.OUT_STDOUT
Exemplo n.º 54
0
class AnagramBot:

  OUT_STDOUT = 1
  OUT_MAINTAINER = 2
  OUT_REPLY = 4
  OUT_DEBUG_REPLY = 8

  def __init__(self):
    self._reddit = Reddit(user_agent='anagram_bot')
    self._anagram = Wordplay()
    self._maintainer = None
    self._output = AnagramBot.OUT_STDOUT

  def setMaintainer(self, username):
    self._maintainer = username

  def setOutput(self, outputMode):
    self._output = outputMode

  def login(self, username, password):
    self._reddit.login(username, password)

  def postPalindrome(self):
    comments = list(self._fetchComments())

    for comment in comments:
      palindrome = self._anagram.pickRandomPalindrome(comment.body)
      if palindrome != None:
        print palindrome
      else:
        print "Nope:", comment.body[:70].replace("\n", "")

  def makeFunny(self):
    comments = list(self._fetchComments())
    attempts = []
    anagrams = []
    maxAttempts = 20
    i = 0

    while len(attempts) < 10 and i < maxAttempts:
      i += 1
      comment = random.choice(comments)
      anagrams = self._attempt(comment.body)
      anagrams = sorted(anagrams, key=lambda x: -len(x[1]))
      if len(anagrams) > 0:
        attempts.append( (comment,anagrams) )

    if len(attempts) == 0:
      return

    attempts = sorted(attempts, key=lambda x: -len(x[1][0][1]))
    (comment, anagrams) = attempts[0]

    anagrams = filter(lambda x: len(x[1]) > 3, anagrams)

    reply = self._replace(comment.body, anagrams)
    self._sendFunny(comment, reply)

  def _sendFunny(self, comment, reply):
    if self._output & AnagramBot.OUT_STDOUT:
      self._printReply(comment, reply)
    
    if self._output & AnagramBot.OUT_MAINTAINER:
      self._debugPM(comment.permalink + "\n\n" + reply)
   
    if self._output & AnagramBot.OUT_DEBUG_REPLY:
      self._moderatedReply(comment, reply)

    if self._output & AnagramBot.OUT_REPLY:
      comment.reply( reply )

  def _debugPM(self, message):
    if self._maintainer == None:
      raise ValueError("No maintainer is set! Use setMaintainer(str).")
    self._reddit.compose_message(self._maintainer, "AnagramBot debug",
      message)

  def _printReply(self, comment, reply):
    print comment.body
    print "==================="
    print reply

  def _moderatedReply(self, comment, reply):
    self._printReply(comment,reply)
    print comment.permalink
    response = raw_input("Send this [YES/NO]? ")
    if response.strip() == "YES":
      print "Sending reply..."
      comment.reply(reply)
    else:
      print "Aborted."

  def _replace(self, text, anagrams):
    for anagram in anagrams:
      pattern = "([^A-Za-z'0-9])" + anagram[0] + "([^A-Za-z'0-9])"
      replace = "\\1" + anagram[1] + "\\2"
      text = re.sub(pattern, replace, text)
    return text

    
  def _attempt(self, text):
    result = []
    noMatches = True
    for match in re.findall("[A-Za-z'0-9]+", text):
      for anagram in self._anagram.solveRandomAnagram(match, 5):
        if anagram != None and anagram != match.upper():
          anagram = _matchCase(match, anagram)
          result.append( (match, anagram) )
    return result
  
  def _fetchComments(self):
    return self._reddit.get_all_comments()
Exemplo n.º 55
0
 def test_moderator_requried(self):
     oth = Reddit(USER_AGENT)
     oth.login('PyApiTestUser3', '1111')
     self.assertRaises(errors.ModeratorRequired, oth.get_settings, self.sr)
Exemplo n.º 56
0
class ModUtils(object):
    VERSION = '0.1.dev'

    def __init__(self, subreddit, site=None, verbose=None):
        self.reddit = Reddit(str(self), site)
        self.sub = self.reddit.get_subreddit(subreddit)
        self.verbose = verbose
        self._current_flair = None

    def __str__(self):
        return 'BBoe\'s ModUtils %s' % self.VERSION

    def add_users(self, category):
        mapping = {'banned': 'ban',
                   'contributors': 'make_contributor',
                   'moderators': 'make_moderator'}

        if category not in mapping:
            print '%r is not a valid option for --add' % category
            return

        func = getattr(self.sub, mapping[category])
        print 'Enter user names (any separation should suffice):'
        data = sys.stdin.read().strip()
        for name in re.split('[^A-Za-z_]+', data):
            func(name)
            print 'Added %r to %s' % (name, category)

    def current_flair(self):
        if self._current_flair is None:
            self._current_flair = []
            if self.verbose:
                print 'Fetching flair list for %s' % self.sub
            for flair in self.sub.flair_list():
                self._current_flair.append(flair)
                yield flair
        else:
            for item in self._current_flair:
                yield item

    def flair_template_sync(self, editable, limit,  # pylint: disable-msg=R0912
                            static, sort, use_css, use_text):
        # Parameter verification
        if not use_text and not use_css:
            raise Exception('At least one of use_text or use_css must be True')
        sorts = ('alpha', 'size')
        if sort not in sorts:
            raise Exception('Sort must be one of: %s' % ', '.join(sorts))

        # Build current flair list along with static values
        if static:
            counter = dict((x, limit) for x in static)
        else:
            counter = {}
        if self.verbose:
            sys.stdout.write('Retrieving current flair')
            sys.stdout.flush()
        for flair in self.current_flair():
            if self.verbose:
                sys.stdout.write('.')
                sys.stdout.flush()
            if use_text and use_css:
                key = (flair['flair_text'], flair['flair_css_class'])
            elif use_text:
                key = flair['flair_text']
            else:
                key = flair['flair_css_class']
            if key in counter:
                counter[key] += 1
            else:
                counter[key] = 1
        if self.verbose:
            print

        # Sort flair list items according to the specified sort
        if sort == 'alpha':
            items = sorted(counter.items())
        else:
            items = sorted(counter.items(), key=lambda x: x[1], reverse=True)

        # Clear current templates and store flair according to the sort
        if self.verbose:
            print 'Clearing current flair templates'
        self.sub.clear_flair_templates()
        for key, count in items:
            if not key or count < limit:
                continue
            if use_text and use_css:
                text, css = key
            elif use_text:
                text, css = key, ''
            else:
                text, css = '', key
            if self.verbose:
                print 'Adding template: text: "%s" css: "%s"' % (text, css)
            self.sub.add_flair_template(text, css, editable)

    def login(self, user, pswd):
        if self.verbose:
            print 'Logging in'
        self.reddit.login(user, pswd)
        if self.verbose:
            print 'Fetching moderator list for %s' % self.sub
        if str(self.sub).lower() not in [str(x).lower() for x in
                                         self.reddit.user.my_moderation()]:
            raise Exception('You do not moderate %s' % self.sub)

    def message(self, category, subject, msg_file):
        users = getattr(self.sub, 'get_%s' % category)()
        if not users:
            print 'There are no %s on %s.' % (category, str(self.sub))
            return

        if msg_file:
            try:
                msg = open(msg_file).read()
            except IOError, error:
                print str(error)
                return
        else: