示例#1
0
    def top_submissions(self):
        """Return a markdown representation of the top submissions."""
        num = min(10, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted([
            x for x in self.submissions.values()
            if self.distinguished or x.distinguished is None
        ],
                                 key=lambda x:
                                 (-x.score, -x.num_comments, x.title))[:num]

        if not top_submissions:
            return ''

        retval = self.post_header.format('Top Submissions')
        for sub in top_submissions:
            title = self._safe_title(sub)
            if sub.permalink in sub.url:
                retval += tt('0. {}').format(title)
            else:
                retval += tt('0. [{}]({})').format(title, sub.url)

            retval += ' by {} ({}, [{} comment{}]({}))\n'.format(
                self._user(sub.author), self._points(sub.score),
                sub.num_comments, 's' if sub.num_comments != 1 else '',
                self._permalink(sub))
        return tt('{}\n').format(retval)
示例#2
0
    def top_submitters(self, num):
        """Return a markdown representation of the top submitters."""
        num = min(num, len(self.submitters))
        if num <= 0:
            return ''

        top_submitters = sorted(
            iteritems(self.submitters),
            key=lambda x: (-sum(y.score for y in x[1]),
                           -len(x[1]), str(x[0])))[:num]

        retval = self.post_header.format('Top Submitters\' Top Submissions')
        for (author, submissions) in top_submitters:
            retval += '1. {}, {} submission{}: {}\n'.format(
                self._points(sum(x.score for x in submissions)),
                len(submissions),
                's' if len(submissions) != 1 else '', self._user(author))
            for sub in sorted(
                    submissions, key=lambda x: (-x.score, x.title))[:10]:
                title = self._safe_title(sub)
                if sub.permalink in sub.url:
                    retval += tt('  1. {}').format(title)
                else:
                    retval += tt('  1. [{}]({})').format(title, sub.url)
                retval += ' ({}, [{} comment{}]({}))\n'.format(
                    self._points(sub.score), sub.num_comments,
                    's' if sub.num_comments != 1 else '',
                    self._permalink(sub))
            retval += '\n'
        return retval
示例#3
0
    def top_submissions(self):
        """Return a markdown representation of the top submissions."""
        num = min(10, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted(
            [x for x in self.submissions.values() if self.distinguished or
             x.distinguished is None],
            key=lambda x: (-x.score, -x.num_comments, x.title))[:num]

        if not top_submissions:
            return ''

        retval = self.post_header.format('Top Submissions')
        for sub in top_submissions:
            title = self._safe_title(sub)
            if sub.permalink in sub.url:
                retval += tt('1. {}').format(title)
            else:
                retval += tt('1. [{}]({})').format(title, sub.url)

            retval += ' by {} ({}, [{} comment{}]({}))\n'.format(
                self._user(sub.author), self._points(sub.score),
                sub.num_comments, 's' if sub.num_comments != 1 else '',
                self._permalink(sub))
        return tt('{}\n').format(retval)
示例#4
0
    def top_submitters(self, num, num_submissions):
        """Return a markdown representation of the top submitters."""
        num = min(num, len(self.submitters))
        if num <= 0:
            return ''

        top_submitters = sorted(iteritems(self.submitters), reverse=True,
                                key=lambda x: (sum(y.score for y in x[1]),
                                               len(x[1])))[:num]

        retval = self.post_header.format('Top Submitters\' Top Submissions')
        for (author, submissions) in top_submitters:
            retval += '0. {0}, {1} submission{2}: {3}\n'.format(
                self._pts(sum(x.score for x in submissions)), len(submissions),
                's' if len(submissions) > 1 else '', self._user(author))
            for sub in sorted(submissions, reverse=True,
                              key=lambda x: x.score)[:num_submissions]:
                title = safe_title(sub)
                if sub.permalink != sub.url:
                    retval += tt('  0. [{0}]({1})').format(title, sub.url)
                else:
                    retval += tt('  0. {0}').format(title)
                retval += ' ({0}, [{1} comment{2}]({3}))\n'.format(
                    self._pts(sub.score), sub.num_comments,
                    's' if sub.num_comments > 1 else '',
                    self._permalink(sub.permalink))
            retval += '\n'
        return retval
示例#5
0
    def top_submitters(self, num, num_submissions):
        num = min(num, len(self.submitters))
        if num <= 0:
            return ''

        top_submitters = sorted(iteritems(self.submitters), reverse=True,
                                key=lambda x: (sum(y.score for y in x[1]),
                                               len(x[1])))[:num]

        retval = self.post_header.format('Top Submitters\' Top Submissions')
        for (author, submissions) in top_submitters:
            retval += '0. {0} pts, {1} submissions: {2}\n'.format(
                sum(x.score for x in submissions), len(submissions),
                self._user(author))
            for sub in sorted(submissions, reverse=True,
                              key=lambda x: x.score)[:num_submissions]:
                title = safe_title(sub)
                if sub.permalink != sub.url:
                    retval += tt('  0. [{0}]({1})').format(title, sub.url)
                else:
                    retval += tt('  0. {0}').format(title)
                retval += ' ({0} pts, [{1} comments]({2}))\n'.format(
                    sub.score, sub.num_comments,
                    self._permalink(sub.permalink))
            retval += '\n'
        return retval
示例#6
0
 def _permalink(permalink):
     tokens = permalink.split('/')
     if tokens[8] == '':  # submission
         return tt('/comments/{0}/_/').format(tokens[6])
     else:  # comment
         return tt('/comments/{0}/_/{1}?context=1').format(tokens[6],
                                                           tokens[8])
示例#7
0
    def top_submissions(self, num):
        """Return a markdown representation of the top submissions."""
        num = min(num, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted(
            [x for x in self.submissions if self.distinguished or
             x.distinguished is None],
            reverse=True, key=lambda x: x.score)[:num]

        if not top_submissions:
            return ''

        retval = self.post_header.format('Top Submissions')
        for sub in top_submissions:
            title = safe_title(sub)
            if sub.permalink != sub.url:
                retval += tt('0. [{0}]({1})').format(title, sub.url)
            else:
                retval += tt('0. {0}').format(title)
            retval += ' by {0} ({1}, [{2} comment{3}]({4}))\n'.format(
                self._user(sub.author), self._pts(sub.score), sub.num_comments,
                's' if sub.num_comments > 1 else '',
                self._permalink(sub.permalink))
        return tt('{0}\n').format(retval)
示例#8
0
 def _permalink(permalink):
     tokens = permalink.split('/')
     if tokens[8] == '':  # submission
         return tt('/comments/{0}/_/').format(tokens[6])
     else:  # comment
         return tt('/comments/{0}/_/{1}?context=1').format(
             tokens[6], tokens[8])
示例#9
0
    def top_submissions(self, num):
        """Return a markdown representation of the top submissions."""
        num = min(num, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted([
            x for x in self.submissions
            if self.distinguished or x.distinguished is None
        ],
                                 reverse=True,
                                 key=lambda x: x.score)[:num]

        if not top_submissions:
            return ''

        retval = self.post_header.format('Top Submissions')
        for sub in top_submissions:
            title = safe_title(sub)
            if sub.permalink != sub.url:
                retval += tt('0. [{0}]({1})').format(title, sub.url)
            else:
                retval += tt('0. {0}').format(title)
            retval += ' by {0} ({1}, [{2} comment{3}]({4}))\n'.format(
                self._user(sub.author), self._pts(sub.score), sub.num_comments,
                's' if sub.num_comments > 1 else '',
                self._permalink(sub.permalink))
        return tt('{0}\n').format(retval)
示例#10
0
    def top_submitters(self, num, num_submissions):
        """Return a markdown representation of the top submitters."""
        num = min(num, len(self.submitters))
        if num <= 0:
            return ''

        top_submitters = sorted(iteritems(self.submitters),
                                reverse=True,
                                key=lambda x:
                                (sum(y.score for y in x[1]), len(x[1])))[:num]

        retval = self.post_header.format('Top Submitters\' Top Submissions')
        for (author, submissions) in top_submitters:
            retval += '0. {0}, {1} submission{2}: {3}\n'.format(
                self._pts(sum(x.score for x in submissions)), len(submissions),
                's' if len(submissions) > 1 else '', self._user(author))
            for sub in sorted(submissions, reverse=True,
                              key=lambda x: x.score)[:num_submissions]:
                title = safe_title(sub)
                if sub.permalink != sub.url:
                    retval += tt('  0. [{0}]({1})').format(title, sub.url)
                else:
                    retval += tt('  0. {0}').format(title)
                retval += ' ({0}, [{1} comment{2}]({3}))\n'.format(
                    self._pts(sub.score), sub.num_comments,
                    's' if sub.num_comments > 1 else '',
                    self._permalink(sub.permalink))
            retval += '\n'
        return retval
示例#11
0
    def top_submitters(self, num):
        """Return a markdown representation of the top submitters."""
        num = min(num, len(self.submitters))
        if num <= 0:
            return ''

        top_submitters = sorted(
            iteritems(self.submitters),
            key=lambda x:
            (-sum(y.score for y in x[1]), -len(x[1]), str(x[0])))[:num]

        retval = self.post_header.format('Top Submitters\' Top Submissions')
        for (author, submissions) in top_submitters:
            retval += '0. {}, {} submission{}: {}\n'.format(
                self._points(sum(x.score for x in submissions)),
                len(submissions), 's' if len(submissions) != 1 else '',
                self._user(author))
            for sub in sorted(submissions, key=lambda x:
                              (-x.score, x.title))[:10]:
                title = self._safe_title(sub)
                if sub.permalink in sub.url:
                    retval += tt('  0. {}').format(title)
                else:
                    retval += tt('  0. [{}]({})').format(title, sub.url)
                retval += ' ({}, [{} comment{}]({}))\n'.format(
                    self._points(sub.score), sub.num_comments,
                    's' if sub.num_comments != 1 else '', self._permalink(sub))
            retval += '\n'
        return retval
示例#12
0
    def top_comments(self):
        """Return a markdown representation of the top comments."""
        num = min(10, len(self.comments))
        if num <= 0:
            return ''

        top_comments = sorted(self.comments,
                              key=lambda x: (-x.score, str(x.author)))[:num]
        retval = self.post_header.format('Top Comments')
        for comment in top_comments:
            title = self._safe_title(comment.submission)
            retval += tt('0. {}: {}\'s [comment]({}) in {}\n').format(
                self._points(comment.score), self._user(comment.author),
                self._permalink(comment), title)
        return tt('{}\n').format(retval)
示例#13
0
    def top_comments(self):
        """Return a markdown representation of the top comments."""
        num = min(10, len(self.comments))
        if num <= 0:
            return ''

        top_comments = sorted(
            self.comments, key=lambda x: (-x.score, str(x.author)))[:num]
        retval = self.post_header.format('Top Comments')
        for comment in top_comments:
            title = self._safe_title(comment.submission)
            retval += tt('1. {}: {}\'s [comment]({}) in {}\n').format(
                self._points(comment.score), self._user(comment.author),
                self._permalink(comment), title)
        return tt('{}\n').format(retval)
示例#14
0
    def top_comments(self, num):
        score = lambda x: x.ups - x.downs

        num = min(num, len(self.comments))
        if num <= 0:
            return ''

        top_comments = sorted(self.comments, reverse=True,
                              key=score)[:num]
        retval = self.post_header.format('Top Comments')
        for comment in top_comments:
            title = safe_title(comment.submission)
            retval += tt('0. {0} pts: {1}\'s [comment]({2}) in {3}\n').format(
                score(comment), self._user(comment.author),
                self._permalink(comment.permalink), title)
        return tt('{0}\n').format(retval)
 def test_env_var_missing_target(self):
     env = dict(os.environ)
     env['SIGNALFX_ACCESS_TOKEN'] = '123'
     with pytest.raises(CalledProcessError) as e:
         self.check_output(['sfx-py-trace'], env=env)
     output = tt(e.value.output)
     assert ('too few arguments' in output or 'the following arguments are required: target' in output)
示例#16
0
    def top_comments(self, num):
        """Return a markdown representation of the top comments."""
        score = lambda x: x.score

        num = min(num, len(self.comments))
        if num <= 0:
            return ''

        top_comments = sorted(self.comments, reverse=True, key=score)[:num]
        retval = self.post_header.format('Top Comments')
        for comment in top_comments:
            title = safe_title(comment.submission)
            retval += tt('0. {0}: {1}\'s [comment]({2}) in {3}\n').format(
                self._pts(score(comment)), self._user(comment.author),
                self._permalink(comment.permalink), title)
        return tt('{0}\n').format(retval)
示例#17
0
    def fetch_recent_submissions(self,
                                 max_duration,
                                 after,
                                 exclude_self,
                                 exclude_link,
                                 since_last=True):
        """Fetch recent submissions in subreddit with boundaries.

        Does not include posts within the last three days as their scores may
        not be representative.

        :param max_duration: When set, specifies the number of days to include
        :param after: When set, fetch all submission after this submission id.
        :param exclude_self: When true, don't include self posts.
        :param exclude_link:  When true, don't include links.
        :param since_last: When true use info from last submission to determine
            the stop point
        :returns: True if any submissions were found.

        """
        if exclude_self and exclude_link:
            raise TypeError('Cannot set both exclude_self and exclude_link.')
        if max_duration:
            self.min_date = self.max_date - DAYS_IN_SECONDS * max_duration
        params = {'after': after} if after else None
        self.msg('DEBUG: Fetching submissions', 1)
        for submission in self.subreddit.get_new(limit=None, params=params):
            if submission.created_utc > self.max_date:
                continue
            if submission.created_utc <= self.min_date:
                break
            if since_last and str(submission.author) == str(self.reddit.user) \
                    and submission.title.startswith(self.post_prefix):
                # Use info in this post to update the min_date
                # And don't include this post
                self.msg(
                    tt('Found previous: {0}').format(safe_title(submission)),
                    2)
                if self.prev_srs is None:  # Only use the most recent
                    self.min_date = max(self.min_date,
                                        self._previous_max(submission))
                    self.prev_srs = submission.permalink
                continue
            if exclude_self and submission.is_self:
                continue
            if exclude_link and not submission.is_self:
                continue
            self.submissions.append(submission)
        num_submissions = len(self.submissions)
        self.msg('DEBUG: Found {0} submissions'.format(num_submissions), 1)
        if num_submissions == 0:
            return False

        # Update real min and max dates
        self.submissions.sort(key=lambda x: x.created_utc)
        self.min_date = self.submissions[0].created_utc
        self.max_date = self.submissions[-1].created_utc
        return True
示例#18
0
    def top_submissions(self, num):
        num = min(num, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted(self.submissions, reverse=True,
                                 key=lambda x: x.score)[:num]

        retval = self.post_header.format('Top Submissions')
        for sub in top_submissions:
            title = safe_title(sub)
            if sub.permalink != sub.url:
                retval += tt('0. [{0}]({1})').format(title, sub.url)
            else:
                retval += tt('0. {0}').format(title)
            retval += ' by {0} ({1} pts, [{2} comments]({3}))\n'.format(
                self._user(sub.author), sub.score, sub.num_comments,
                self._permalink(sub.permalink))
        return tt('{0}\n').format(retval)
示例#19
0
    def fetch_recent_submissions(self, max_duration, after, exclude_self,
                                 exclude_link, since_last=True):
        """Fetch recent submissions in subreddit with boundaries.

        Does not include posts within the last three days as their scores may
        not be representative.

        :param max_duration: When set, specifies the number of days to include
        :param after: When set, fetch all submission after this submission id.
        :param exclude_self: When true, don't include self posts.
        :param exclude_link:  When true, don't include links.
        :param since_last: When true use info from last submission to determine
            the stop point
        :returns: True if any submissions were found.

        """
        if exclude_self and exclude_link:
            raise TypeError('Cannot set both exclude_self and exclude_link.')
        if max_duration:
            self.min_date = self.max_date - DAYS_IN_SECONDS * max_duration
        params = {'after': after} if after else None
        self.msg('DEBUG: Fetching submissions', 1)
        for submission in self.subreddit.get_new(limit=None, params=params):
            if submission.created_utc > self.max_date:
                continue
            if submission.created_utc <= self.min_date:
                break
            if since_last and str(submission.author) == str(self.reddit.user) \
                    and submission.title.startswith(self.post_prefix):
                # Use info in this post to update the min_date
                # And don't include this post
                self.msg(tt('Found previous: {0}')
                         .format(safe_title(submission)), 2)
                if self.prev_srs is None:  # Only use the most recent
                    self.min_date = max(self.min_date,
                                        self._previous_max(submission))
                    self.prev_srs = submission.permalink
                continue
            if exclude_self and submission.is_self:
                continue
            if exclude_link and not submission.is_self:
                continue
            self.submissions.append(submission)
        num_submissions = len(self.submissions)
        self.msg('DEBUG: Found {0} submissions'.format(num_submissions), 1)
        if num_submissions == 0:
            return False

        # Update real min and max dates
        self.submissions.sort(key=lambda x: x.created_utc)
        self.min_date = self.submissions[0].created_utc
        self.max_date = self.submissions[-1].created_utc
        return True
示例#20
0
 def _user(user):
     if user is None:
         return '_deleted_'
     elif isinstance(user, Redditor):
         user = str(user)
     return tt('[{0}](/user/{1})').format(user.replace('_', r'\_'), user)
示例#21
0
class SubredditStats(object):
    """Contain all the functionality of the subreddit_stats command."""

    post_footer = tt('>Generated with [BBoe](/u/bboe)\'s [Subreddit Stats]'
                     '(https://github.com/praw-dev/prawtools_fork) '
                     '([Donate](https://cash.me/$praw))')
    post_header = tt('---\n###{}\n')
    post_prefix = tt('Subreddit Stats:')

    @staticmethod
    def get_http(
        uri,
        parameters=None,
        max_retries=5,
        headers={
            'User-Agent':
            'Mozilla/57.0 (Macintosh; '
            'Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
        }):
        """ Get data from API or download HTML, try each URI 5 times """
        with requests.Session() as s:
            a = requests.adapters.HTTPAdapter(max_retries)
            s.mount('https://', a)
            response = s.get(uri, params=parameters, headers=headers)
        return response

    @staticmethod
    def _permalink(item):
        if isinstance(item, MiniSubmission):
            return tt('/comments/{}').format(item.id)
        else:
            return tt('/comments/{}//{}?context=1').format(
                item.submission.id, item.id)

    @staticmethod
    def _points(points):
        return '1 point' if points == 1 else '{} points'.format(points)

    @staticmethod
    def _rate(items, duration):
        return 86400. * items / duration if duration else items

    @staticmethod
    def _safe_title(submission):
        """Return titles with whitespace replaced by spaces and stripped."""
        return RE_WHITESPACE.sub(' ', submission.title).strip()

    @staticmethod
    def _save_report(title, body):
        descriptor, filename = mkstemp('.md', dir='.')
        os.close(descriptor)
        with codecs.open(filename, 'w', 'utf-8') as fp:
            fp.write('{}\n\n{}'.format(title, body))
        logger.info('Report saved to {}'.format(filename))

    @staticmethod
    def _user(user):
        return '_deleted_' if user is None else tt('/u/{}').format(user)

    def __init__(self,
                 subreddit,
                 site,
                 distinguished,
                 user_agent=None,
                 reddit=None):
        """Initialize the SubredditStats instance with config options."""
        self.commenters = defaultdict(list)
        self.comments = []
        self.distinguished = distinguished
        self.min_date = 0
        self.max_date = time.time() - SECONDS_IN_A_DAY
        if isinstance(user_agent, dict):
            self.user_agent = user_agent
        else:
            self.user_agent = AGENT
        self.reddit = (reddit or Reddit(
            site, check_for_updates=False, user_agent=self.user_agent))
        self.submissions = {}
        self.subreddit_statistics = {}
        self.submitters = defaultdict(list)
        self.subreddit_name = subreddit
        self.submit_subreddit = self.reddit.subreddit('subreddit_stats')
        self.subreddit = self.reddit.subreddit(subreddit)
        self.collection_interval = None

    def __reset__(self, subreddit):
        self.commenters = defaultdict(list)
        self.comments = []
        self.min_date = 0
        self.max_date = time.time() - SECONDS_IN_A_DAY
        self.submissions = {}
        self.subreddit_statistics = {}
        self.submitters = defaultdict(list)
        self.subreddit_name = subreddit
        self.subreddit = self.reddit.subreddit(subreddit)
        self.collection_interval = None
        self.collection_start_time = None
        gc.collect()

    def basic_stats(self, stats_only=True, tuple_format=True):
        """Return a markdown representation of simple statistics."""

        comment_score = sum(comment.score for comment in self.comments)
        if self.comments:
            comment_duration = (self.comments[-1].created_utc -
                                self.comments[0].created_utc)
            if self.collection_interval in TOP_VALUES and self.collection_interval is not "all":
                request_duration = TOP_VALS_IN_SECONDS[
                    self.collection_interval]
                comment_rate = self._rate(len(self.comments), request_duration)
            else:
                comment_rate = self._rate(len(self.comments), comment_duration)

        else:
            comment_rate = 0

        if not isinstance(self.collection_start_time, int):
            error_msg = "Collection start time variable not initialized correctly"
            logging.error(error_msg)
            raise AttributeError(error_msg)

        submission_duration = self.max_date - self.min_date
        if self.collection_interval in TOP_VALUES and self.collection_interval is not "all":
            request_duration = TOP_VALS_IN_SECONDS[self.collection_interval]
            submission_rate = self._rate(len(self.submissions),
                                         request_duration)
        else:
            submission_rate = self._rate(len(self.submissions),
                                         submission_duration)
        submission_score = sum(sub.score for sub in self.submissions.values())
        subscribers, active_users = None, None
        try:
            subscribers, active_users = self.fetch_wiki_data(
                self.subreddit_name, header=self.user_agent)
        except Exception as e:
            error_msg = (
                "error in fetching wiki data, error message was %s") % str(e)
            logging.warn(error_msg)
        if tuple_format == True:
            stats_data = (self.subreddit_name, self.collection_interval,
                          self.collection_start_time, int(self.min_date),
                          int(self.max_date), int(submission_duration),
                          comment_score, int(comment_rate), submission_score,
                          int(submission_rate), len(self.comments),
                          len(self.commenters), len(self.submissions),
                          len(self.submitters), subscribers, active_users)
        else:
            stats_data = {
                "subreddit": self.subreddit_name,
                "collection_interval": self.collection_interval,
                "collection_start_time": self.collection_start_time,
                "min_date": int(self.min_date),
                "max_date": int(self.max_date),
                "comment_rate": int(comment_rate),
                "comment_score": comment_score,
                "submission_interval": int(submission_duration),
                "submission_score": submission_score,
                "submission_rate": int(submission_rate),
                "num_comments": len(self.comments),
                "num_commenters": len(self.commenters),
                "num_submissions": len(self.submissions),
                "num_submitters": len(self.submitters),
                "measurement_period": self.collection_interval,
                "subscribers": subscribers,
                "active_users": active_users
            }
        if stats_only == True:
            return stats_data
        else:
            values = [('Total', len(self.submissions), len(self.comments)),
                      ('Rate (per day)', '{:.2f}'.format(submission_rate),
                       '{:.2f}'.format(comment_rate)),
                      ('Unique Redditors', len(self.submitters),
                       len(self.commenters)),
                      ('Combined Score', submission_score, comment_score)]

            retval = 'Period: {:.2f} days\n\n'.format(submission_duration /
                                                      86400.)
            retval += '||Submissions|Comments|\n:-:|--:|--:\n'
            for quad in values:
                retval += '__{}__|{}|{}\n'.format(*quad)
            return retval + '\n'

    def fetch_recent_submissions(self, max_duration):
        """Fetch recent submissions in subreddit with boundaries.

        Does not include posts within the last day as their scores may not be
        representative.

        :param max_duration: When set, specifies the number of days to include

        """
        if max_duration:
            self.min_date = self.max_date - SECONDS_IN_A_DAY * max_duration
        for submission in self.subreddit.new(limit=None):
            if submission.created_utc <= self.min_date:
                break
            if submission.created_utc > self.max_date:
                continue
            self.submissions[submission.id] = MiniSubmission(submission)

    def fetch_submissions(self, submissions_callback, *args):
        """Wrap the submissions_callback function."""
        logger.debug('Fetching submissions')

        submissions_callback(*args)

        logger.info('Found {} submissions'.format(len(self.submissions)))
        if not self.submissions:
            return

        self.min_date = min(x.created_utc for x in self.submissions.values())
        self.max_date = max(x.created_utc for x in self.submissions.values())

        self.process_submitters()
        self.process_commenters()

    def fetch_wiki_data(self, subreddit, header):
        wiki_url = "https://www.reddit.com/r/" + subreddit + "/about/.json"
        wiki_data = self.get_http(wiki_url, headers=header)
        if wiki_data.status_code == 200:
            wiki_data = wiki_data.json()
            return wiki_data["data"]["subscribers"], wiki_data["data"][
                "active_user_count"]

    def fetch_top_submissions(self, top):
        """Fetch top submissions by some top value.

        :param top: One of week, month, year, all
        :returns: True if any submissions were found.

        """
        for submission in self.subreddit.top(limit=None, time_filter=top):
            self.submissions[submission.id] = MiniSubmission(submission)

    def process_commenters(self):
        """Group comments by author."""
        for index, submission in enumerate(self.submissions.values()):
            if submission.num_comments == 0:
                continue
            real_submission = self.reddit.submission(id=submission.id)
            real_submission.comment_sort = 'top'

            for i in range(3):
                try:
                    real_submission.comments.replace_more(limit=0)
                    break
                except RequestException:
                    if i >= 2:
                        raise
                    logger.debug(
                        'Failed to fetch submission {}, retrying'.format(
                            submission.id))

            self.comments.extend(
                MiniComment(comment, submission)
                for comment in real_submission.comments.list()
                if self.distinguished or comment.distinguished is None)

            if index % 50 == 49:
                logger.debug('Completed: {:4d}/{} submissions'.format(
                    index + 1, len(self.submissions)))

            # Clean up to reduce memory usage
            submission = None
            gc.collect()

        self.comments.sort(key=lambda x: x.created_utc)
        for comment in self.comments:
            if comment.author:
                self.commenters[comment.author].append(comment)

    def process_submitters(self):
        """Group submissions by author."""
        for submission in self.submissions.values():
            if submission.author and (self.distinguished
                                      or submission.distinguished is None):
                self.submitters[submission.author].append(submission)

    def process_submission_stats(self):
        """Return data from top submissions."""
        num = len(self.submissions)
        if num <= 0:
            return []
        st = self.collection_start_time
        submissions = [(s_id, st, x.title, self.subreddit_name, x.score,
                        x.num_comments, int(x.created_utc), x.author)
                       for s_id, x in self.submissions.iteritems()
                       if self.distinguished or x.distinguished is None]
        return submissions

    def publish_results(self,
                        view,
                        submitters,
                        commenters,
                        publish_externally=False):
        """Submit the results to the subreddit. Has no return value (None)."""
        def timef(timestamp, date_only=False):
            """Return a suitable string representaation of the timestamp."""
            dtime = datetime.fromtimestamp(timestamp)
            if date_only:
                retval = dtime.strftime('%Y-%m-%d')
            else:
                retval = dtime.strftime('%Y-%m-%d %H:%M PDT')
            return retval

        if publish_externally == False:
            basic = self.basic_stats(stats_only=True)
            submissions = self.process_submission_stats()

            natural_language_data = {"submissions": submissions}
            subreddit_statistics = {
                "numerical_stats": basic,
                "subreddit_name": self.subreddit_name,
                "natural_language_data": natural_language_data,
                "time_recorded": time.time()
            }
            self.subreddit_statistics = subreddit_statistics
            return subreddit_statistics

        def publish_to_subbreddit(submitters):
            basic = self.basic_stats()
            top_commenters = self.top_commenters(commenters)
            top_comments = self.top_comments()
            top_submissions = self.top_submissions()
            # Decrease number of top submitters if body is too large.
            body = None
            while body is None or len(body) > 40000 and submitters > 0:
                body = (basic + self.top_submitters(submitters) +
                        top_commenters + top_submissions + top_comments +
                        self.post_footer)
                submitters -= 1

            title = '{} {} {}posts from {} to {}'.format(
                self.post_prefix,
                str(self.subreddit), 'top ' if view in TOP_VALUES else '',
                timef(self.min_date, True), timef(self.max_date))

            try:  # Attempt to make the submission
                return self.submit_subreddit.submit(title, selftext=body)
            except Exception:
                logger.exception('Failed to submit to {}'.format(
                    self.submit_subreddit))
                self._save_report(title, body)

        if publish_externally == True:
            publish_to_subbreddit(submitters)

    def run(self, view, submitters, commenters):
        """Run stats and return the created Submission."""
        logger.info('Analyzing subreddit: {}'.format(self.subreddit))
        self.collection_interval = view
        self.collection_start_time = int(time.time())
        if view in TOP_VALUES:
            callback = self.fetch_top_submissions
        else:
            callback = self.fetch_recent_submissions
            view = int(view)

        self.fetch_submissions(callback, view)

        if not self.submissions:
            logger.warning('No submissions were found.')
            return

        return self.publish_results(view, submitters, commenters)

    def top_commenters(self, num, give_list=False):
        """Return a markdown representation of the top commenters."""
        num = min(num, len(self.commenters))
        if num <= 0:
            if give_list == True:
                return []
            else:
                return ''

        top_commenters = sorted(
            iteritems(self.commenters),
            key=lambda x:
            (-sum(y.score for y in x[1]), -len(x[1]), str(x[0])))[:num]

        if give_list == True:
            return top_commenters

        retval = self.post_header.format('Top Commenters')
        for author, comments in top_commenters:
            retval += '0. {} ({}, {} comment{})\n'.format(
                self._user(author), self._points(sum(x.score
                                                     for x in comments)),
                len(comments), 's' if len(comments) != 1 else '')
        return '{}\n'.format(retval)

    def top_submitters(self, num, give_list=False):
        """Return a markdown representation of the top submitters."""
        num = min(num, len(self.submitters))
        if num <= 0:
            if give_list == True:
                return []
            else:
                return ''

        top_submitters = sorted(
            iteritems(self.submitters),
            key=lambda x:
            (-sum(y.score for y in x[1]), -len(x[1]), str(x[0])))[:num]
        if give_list == True:
            return top_submitters

        retval = self.post_header.format('Top Submitters\' Top Submissions')
        for (author, submissions) in top_submitters:
            retval += '0. {}, {} submission{}: {}\n'.format(
                self._points(sum(x.score for x in submissions)),
                len(submissions), 's' if len(submissions) != 1 else '',
                self._user(author))
            for sub in sorted(submissions, key=lambda x:
                              (-x.score, x.title))[:10]:
                title = self._safe_title(sub)
                if sub.permalink in sub.url:
                    retval += tt('  0. {}').format(title)
                else:
                    retval += tt('  0. [{}]({})').format(title, sub.url)
                retval += ' ({}, [{} comment{}]({}))\n'.format(
                    self._points(sub.score), sub.num_comments,
                    's' if sub.num_comments != 1 else '', self._permalink(sub))
            retval += '\n'
        return retval

    def top_submissions(self, give_list=False):
        """Return a markdown representation of the top submissions."""
        num = min(10, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted([
            x for x in self.submissions.values()
            if self.distinguished or x.distinguished is None
        ],
                                 key=lambda x:
                                 (-x.score, -x.num_comments, x.title))[:num]

        if not top_submissions:
            if give_list == True:
                return top_submissions
            else:
                return ''

        if give_list == True:
            return top_submissions

        retval = self.post_header.format('Top Submissions')
        for sub in top_submissions:
            title = self._safe_title(sub)
            if sub.permalink in sub.url:
                retval += tt('0. {}').format(title)
            else:
                retval += tt('0. [{}]({})').format(title, sub.url)

            retval += ' by {} ({}, [{} comment{}]({}))\n'.format(
                self._user(sub.author), self._points(sub.score),
                sub.num_comments, 's' if sub.num_comments != 1 else '',
                self._permalink(sub))
        return tt('{}\n').format(retval)

    def top_comments(self, give_list=False):
        """Return a markdown representation of the top comments."""
        num = min(10, len(self.comments))
        if num <= 0:
            if give_list == True:
                return []
            else:
                return ''

        top_comments = sorted(self.comments,
                              key=lambda x: (-x.score, str(x.author)))[:num]

        if give_list == True:
            return top_comments

        retval = self.post_header.format('Top Comments')
        for comment in top_comments:
            title = self._safe_title(comment.submission)
            retval += tt('0. {}: {}\'s [comment]({}) in {}\n').format(
                self._points(comment.score), self._user(comment.author),
                self._permalink(comment), title)
        return tt('{}\n').format(retval)
示例#22
0
class SubRedditStats(object):
    """Contain all the functionality of the subreddit_stats command."""

    post_prefix = tt('Subreddit Stats:')
    post_header = tt('---\n###{}\n')
    post_footer = tt('>Generated with [BBoe](/u/bboe)\'s [Subreddit Stats]'
                     '(https://github.com/praw-dev/prawtools)  \n{}'
                     'SRS Marker: {}')
    re_marker = re.compile('SRS Marker: (\d+)')

    @staticmethod
    def _previous_max(submission):
        return float(SubRedditStats.re_marker.findall(submission.selftext)[-1])

    @staticmethod
    def _permalink(item):
        if isinstance(item, Submission):
            return tt('/comments/{}').format(item.id)
        else:  # comment
            return tt('/comments/{}//{}?context=1').format(
                item.submission.id, item.id)

    @staticmethod
    def _pts(points):
        return '1 pt' if points == 1 else '{} pts'.format(points)

    @staticmethod
    def _user(user):
        if user is None:
            return '_deleted_'
        elif isinstance(user, Redditor):
            user = str(user)
        return tt('[{}](/user/{})').format(user.replace('_', '\_'), user)

    def __init__(self, subreddit, site, verbosity, distinguished):
        """Initialize the SubRedditStats instance with config options."""
        self.reddit = Reddit(site,
                             disable_update_check=True,
                             user_agent='prawtools/{}'.format(__version__))
        self.subreddit = self.reddit.subreddit(subreddit)
        self.verbosity = verbosity
        self.distinguished = distinguished
        self.submissions = []
        self.comments = []
        self.submitters = defaultdict(list)
        self.commenters = defaultdict(list)
        self.min_date = 0
        self.max_date = time.time() - DAYS_IN_SECONDS * 3
        self.prev_srs = None

    def msg(self, msg, level, overwrite=False):
        """Output a messaage to the screen if the verbosity is sufficient."""
        if self.verbosity and self.verbosity >= level:
            sys.stdout.write(msg)
            if overwrite:
                sys.stdout.write('\r')
                sys.stdout.flush()
            else:
                sys.stdout.write('\n')

    def prev_stat(self, prev_id):
        """Load the previous subreddit stat."""
        self.prev_srs = self.reddit.submission(prev_id)
        self.min_date = self._previous_max(self.prev_srs)

    def fetch_recent_submissions(self,
                                 max_duration,
                                 after,
                                 exclude_self,
                                 exclude_link,
                                 since_last=True):
        """Fetch recent submissions in subreddit with boundaries.

        Does not include posts within the last three days as their scores may
        not be representative.

        :param max_duration: When set, specifies the number of days to include
        :param after: When set, fetch all submission after this submission id.
        :param exclude_self: When true, don't include self posts.
        :param exclude_link:  When true, don't include links.
        :param since_last: When true use info from last submission to determine
            the stop point
        :returns: True if any submissions were found.

        """
        if exclude_self and exclude_link:
            raise TypeError('Cannot set both exclude_self and exclude_link.')
        if max_duration:
            self.min_date = self.max_date - DAYS_IN_SECONDS * max_duration
        params = {'after': after} if after else None
        self.msg('DEBUG: Fetching submissions', 1)
        for submission in self.subreddit.new(limit=None, params=params):
            if submission.created_utc <= self.min_date:
                break
            if since_last and submission.title.startswith(self.post_prefix) \
               and submission.author == self.reddit.config.username:
                # Use info in this post to update the min_date
                # And don't include this post
                self.msg(
                    tt('Found previous: {}').format(safe_title(submission)), 2)
                if self.prev_srs is None:  # Only use the most recent
                    self.min_date = max(self.min_date,
                                        self._previous_max(submission))
                    self.prev_srs = submission
                continue
            if submission.created_utc > self.max_date:
                continue
            if exclude_self and submission.is_self:
                continue
            if exclude_link and not submission.is_self:
                continue
            self.submissions.append(submission)
        num_submissions = len(self.submissions)
        self.msg('DEBUG: Found {} submissions'.format(num_submissions), 1)
        if num_submissions == 0:
            return False

        # Update real min and max dates
        self.submissions.sort(key=lambda x: x.created_utc)
        self.min_date = self.submissions[0].created_utc
        self.max_date = self.submissions[-1].created_utc
        return True

    def fetch_top_submissions(self, top, exclude_self, exclude_link):
        """Fetch top 1000 submissions by some top value.

        :param top: One of week, month, year, all
        :param exclude_self: When true, don't include self posts.
        :param exclude_link: When true, include only self posts
        :returns: True if any submissions were found.

        """
        if exclude_self and exclude_link:
            raise TypeError('Cannot set both exclude_self and exclude_link.')
        if top not in ('day', 'week', 'month', 'year', 'all'):
            raise TypeError('{!r} is not a valid top value'.format(top))
        self.msg('DEBUG: Fetching submissions', 1)
        params = {'t': top}
        for submission in self.subreddit.top(limit=None, params=params):
            if exclude_self and submission.is_self:
                continue
            if exclude_link and not submission.is_self:
                continue
            self.submissions.append(submission)
        num_submissions = len(self.submissions)
        self.msg('DEBUG: Found {} submissions'.format(num_submissions), 1)
        if num_submissions == 0:
            return False

        # Update real min and max dates
        self.submissions.sort(key=lambda x: x.created_utc)
        self.min_date = self.submissions[0].created_utc
        self.max_date = self.submissions[-1].created_utc
        return True

    def process_submitters(self):
        """Group submissions by author."""
        self.msg('DEBUG: Processing Submitters', 1)
        for submission in self.submissions:
            if submission.author and (self.distinguished
                                      or submission.distinguished is None):
                self.submitters[str(submission.author)].append(submission)

    def process_commenters(self):
        """Group comments by author."""
        num = len(self.submissions)
        self.msg('DEBUG: Processing Commenters on {} submissions'.format(num),
                 1)
        for i, submission in enumerate(self.submissions):
            submission.comment_sort = 'top'
            self.msg('{}/{} submissions'.format(i + 1, num), 2, overwrite=True)
            if submission.num_comments == 0:
                continue
            skipped = submission.comments.replace_more()
            if skipped:
                skip_num = sum(x.count for x in skipped)
                print('Ignored {} comments ({} MoreComment objects)'.format(
                    skip_num, len(skipped)))
            comments = [
                x for x in submission.comments.list()
                if self.distinguished or x.distinguished is None
            ]
            self.comments.extend(comments)
        for comment in self.comments:
            if comment.author:
                self.commenters[str(comment.author)].append(comment)

    def basic_stats(self):
        """Return a markdown representation of simple statistics."""
        sub_score = sum(x.score for x in self.submissions)
        comm_score = sum(x.score for x in self.comments)
        sub_duration = self.max_date - self.min_date
        sub_rate = (86400. * len(self.submissions) /
                    sub_duration if sub_duration else len(self.submissions))

        # Compute comment rate
        if self.comments:
            self.comments.sort(key=lambda x: x.created_utc)
            duration = (self.comments[-1].created_utc -
                        self.comments[0].created_utc)
            comm_rate = (86400. * len(self.comments) /
                         duration if duration else len(self.comments))
        else:
            comm_rate = 0

        values = [('Total', len(self.submissions), len(self.comments)),
                  ('Rate (per day)', '{:.2f}'.format(sub_rate),
                   '{:.2f}'.format(comm_rate)),
                  ('Unique Redditors', len(self.submitters),
                   len(self.commenters)),
                  ('Combined Score', sub_score, comm_score)]

        retval = 'Period: {:.2f} days\n\n'.format(sub_duration / 86400.)
        retval += '||Submissions|Comments|\n:-:|--:|--:\n'
        for quad in values:
            retval += '__{}__|{}|{}\n'.format(*quad)
        return retval + '\n'

    def top_submitters(self, num, num_submissions):
        """Return a markdown representation of the top submitters."""
        num = min(num, len(self.submitters))
        if num <= 0:
            return ''

        top_submitters = sorted(iteritems(self.submitters),
                                reverse=True,
                                key=lambda x:
                                (sum(y.score for y in x[1]), len(x[1])))[:num]

        retval = self.post_header.format('Top Submitters\' Top Submissions')
        for (author, submissions) in top_submitters:
            retval += '0. {}, {} submission{}: {}\n'.format(
                self._pts(sum(x.score for x in submissions)), len(submissions),
                's' if len(submissions) > 1 else '', self._user(author))
            for sub in sorted(submissions, reverse=True,
                              key=lambda x: x.score)[:num_submissions]:
                title = safe_title(sub)
                if sub.permalink != sub.url:
                    retval += tt('  0. [{}]({})').format(title, sub.url)
                else:
                    retval += tt('  0. {}').format(title)
                retval += ' ({}, [{} comment{}]({}))\n'.format(
                    self._pts(sub.score), sub.num_comments,
                    's' if sub.num_comments > 1 else '', self._permalink(sub))
            retval += '\n'
        return retval

    def top_commenters(self, num):
        """Return a markdown representation of the top commenters."""
        num = min(num, len(self.commenters))
        if num <= 0:
            return ''

        top_commenters = sorted(iteritems(self.commenters),
                                reverse=True,
                                key=lambda x:
                                (sum(y.score for y in x[1]), len(x[1])))[:num]

        retval = self.post_header.format('Top Commenters')
        for author, comments in top_commenters:
            retval += '0. {} ({}, {} comment{})\n'.format(
                self._user(author), self._pts(sum(x.score for x in comments)),
                len(comments), 's' if len(comments) > 1 else '')
        return '{}\n'.format(retval)

    def top_submissions(self, num):
        """Return a markdown representation of the top submissions."""
        num = min(num, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted([
            x for x in self.submissions
            if self.distinguished or x.distinguished is None
        ],
                                 reverse=True,
                                 key=lambda x: x.score)[:num]

        if not top_submissions:
            return ''

        retval = self.post_header.format('Top Submissions')
        for sub in top_submissions:
            title = safe_title(sub)
            if sub.permalink != sub.url:
                retval += tt('0. [{}]({})').format(title, sub.url)
            else:
                retval += tt('0. {}').format(title)
            retval += ' by {} ({}, [{} comment{}]({}))\n'.format(
                self._user(sub.author), self._pts(sub.score), sub.num_comments,
                's' if sub.num_comments > 1 else '', self._permalink(sub))
        return tt('{}\n').format(retval)

    def top_comments(self, num):
        """Return a markdown representation of the top comments."""
        num = min(num, len(self.comments))
        if num <= 0:
            return ''

        top_comments = sorted(self.comments,
                              reverse=True,
                              key=lambda x: x.score)[:num]
        retval = self.post_header.format('Top Comments')
        for comment in top_comments:
            title = safe_title(comment.submission)
            retval += tt('0. {}: {}\'s [comment]({}) in {}\n').format(
                self._pts(comment.score), self._user(comment.author),
                self._permalink(comment), title)
        return tt('{}\n').format(retval)

    def publish_results(self,
                        subreddit,
                        submitters,
                        commenters,
                        submissions,
                        comments,
                        top,
                        debug=False):
        """Submit the results to the subreddit. Has no return value (None)."""
        def timef(timestamp, date_only=False):
            """Return a suitable string representaation of the timestamp."""
            dtime = datetime.fromtimestamp(timestamp)
            if date_only:
                retval = dtime.strftime('%Y-%m-%d')
            else:
                retval = dtime.strftime('%Y-%m-%d %H:%M PDT')
            return retval

        if self.prev_srs:
            prev = '[Prev SRS]({})  \n'.format(self._permalink(self.prev_srs))
        else:
            prev = ''

        basic = self.basic_stats()
        t_commenters = self.top_commenters(commenters)
        t_submissions = self.top_submissions(submissions)
        t_comments = self.top_comments(comments)
        footer = self.post_footer.format(prev, self.max_date)

        body = ''
        num_submissions = 10
        while body == '' or len(body) > MAX_BODY_SIZE and num_submissions > 2:
            t_submitters = self.top_submitters(submitters, num_submissions)
            body = (basic + t_submitters + t_commenters + t_submissions +
                    t_comments + footer)
            num_submissions -= 1

        if len(body) > MAX_BODY_SIZE:
            print('The resulting message is too big. Not submitting.')
            debug = True

        # Set the initial title
        base_title = '{} {} {}posts from {} to {}'.format(
            self.post_prefix, str(self.subreddit), 'top ' if top else '',
            timef(self.min_date, True), timef(self.max_date))

        submitted = False
        while not debug and not submitted:
            if subreddit:  # Verify the user wants to submit to the subreddit
                msg = ('You are about to submit to subreddit {} as {}.\n'
                       'Are you sure? yes/[no]: '.format(
                           subreddit, self.reddit.config.username))
                sys.stdout.write(msg)
                sys.stdout.flush()
                if sys.stdin.readline().strip().lower() not in ['y', 'yes']:
                    subreddit = None
            elif not subreddit:  # Prompt for the subreddit to submit to
                msg = ('Please enter a subreddit to submit to (press return to'
                       ' abort): ')
                sys.stdout.write(msg)
                sys.stdout.flush()
                subreddit = sys.stdin.readline().strip()
                if not subreddit:
                    print('Submission aborted\n')
                    debug = True

            # Vary the title depending on where posting
            if str(self.subreddit) == subreddit:
                title = '{} {}posts from {} to {}'.format(
                    self.post_prefix, 'top ' if top else '',
                    timef(self.min_date, True), timef(self.max_date))
            else:
                title = base_title

            if subreddit:
                subreddit = self.reddit.subreddit(subreddit)
                try:  # Attempt to make the submission
                    print(subreddit.submit(title, selftext=body).permalink)
                    submitted = True
                except Exception as error:
                    print('The submission failed: {!r}'.format(error))
                    subreddit = None

        if not submitted:
            print(base_title)
            print(body)

    def save_csv(self, filename):
        """Create csv file containing comments and submissions by author."""
        redditors = set(self.submitters.keys()).union(self.commenters.keys())
        mapping = dict((x.lower(), x) for x in redditors)
        with codecs.open(filename, 'w', encoding='utf-8') as outfile:
            outfile.write('username, type, permalink, score\n')
            for _, redditor in sorted(mapping.items()):
                for submission in self.submitters.get(redditor, []):
                    outfile.write(u'{}, submission, {}, {}\n'.format(
                        redditor, submission.permalink, submission.score))
                for comment in self.commenters.get(redditor, []):
                    outfile.write(u'{}, comment, {}, {}\n'.format(
                        redditor, comment.permalink, comment.score))
示例#23
0
 def _user(user):
     if user is None:
         return '_deleted_'
     elif isinstance(user, Redditor):
         user = str(user)
     return tt('[{}](/user/{})').format(user.replace('_', '\_'), user)
 def test_disabled_env_var_prevents_site_addition(self):
     os.environ['SIGNALFX_TRACING_ENABLED'] = 'False'
     with pytest.raises(CalledProcessError) as e:
         self.check_output(['sfx-py-trace', target] + target_args)
     output = tt(e.value.output)
     assert 'assert isinstance(opentracing.tracer, Tracer)' in output
 def test_named_missing_target_args(self):
     with pytest.raises(CalledProcessError) as e:
         self.check_output(['sfx-py-trace', '--token', '123', target])
     output = tt(e.value.output)
     assert 'required' in output or '--one' in output
 def test_named_missing_target(self):
     with pytest.raises(CalledProcessError) as e:
         self.check_output(['sfx-py-trace', '--token', '123'])
     output = tt(e.value.output)
     assert ('too few arguments' in output or 'the following arguments are required: target' in output)
 def test_token_arg_optional_without_env_var(self):
     with pytest.raises(Exception) as e:
         self.check_output(['sfx-py-trace', 'file.py'])
     # check that target run attempted after instrumentation
     expected = "can't open file 'file.py': [Errno 2] No such file or directory"
     assert expected in tt(e.value.output)
示例#28
0
 def _user(user):
     return '_deleted_' if user is None else tt('/u/{}').format(user)
示例#29
0
 def _permalink(item):
     if isinstance(item, MiniSubmission):
         return tt('/comments/{}').format(item.id)
     else:
         return tt('/comments/{}//{}?context=1').format(
             item.submission.id, item.id)
示例#30
0
class SubredditStats(object):
    """Contain all the functionality of the subreddit_stats command."""

    post_footer = tt('>Generated with [BBoe](/u/bboe)\'s [Subreddit Stats]'
                     '(https://github.com/praw-dev/prawtools) '
                     '([Donate](https://cash.me/$praw))')
    post_header = tt('---\n###{}\n')
    post_prefix = tt('Subreddit Stats:')

    @staticmethod
    def _permalink(item):
        if isinstance(item, MiniSubmission):
            return tt('/comments/{}').format(item.id)
        else:
            return tt('/comments/{}//{}?context=1').format(
                item.submission.id, item.id)

    @staticmethod
    def _points(points):
        return '1 point' if points == 1 else '{} points'.format(points)

    @staticmethod
    def _rate(items, duration):
        return 86400. * items / duration if duration else items

    @staticmethod
    def _safe_title(submission):
        """Return titles with whitespace replaced by spaces and stripped."""
        return RE_WHITESPACE.sub(' ', submission.title).strip()

    @staticmethod
    def _save_report(title, body):
        descriptor, filename = mkstemp('.md', dir='.')
        os.close(descriptor)
        with codecs.open(filename, 'w', 'utf-8') as fp:
            fp.write('{}\n\n{}'.format(title, body))
        logger.info('Report saved to {}'.format(filename))

    @staticmethod
    def _user(user):
        return '_deleted_' if user is None else tt('/u/{}').format(user)

    def __init__(self, subreddit, site, distinguished, reddit=None):
        """Initialize the SubredditStats instance with config options."""
        self.commenters = defaultdict(list)
        self.comments = []
        self.distinguished = distinguished
        self.min_date = 0
        self.max_date = time.time() - SECONDS_IN_A_DAY
        self.reddit = (reddit or Reddit(
            site, check_for_updates=False, user_agent=AGENT))
        self.submissions = {}
        self.submitters = defaultdict(list)
        self.submit_subreddit = self.reddit.subreddit('subreddit_stats')
        self.subreddit = self.reddit.subreddit(subreddit)

    def basic_stats(self):
        """Return a markdown representation of simple statistics."""
        comment_score = sum(comment.score for comment in self.comments)
        if self.comments:
            comment_duration = (self.comments[-1].created_utc -
                                self.comments[0].created_utc)
            comment_rate = self._rate(len(self.comments), comment_duration)
        else:
            comment_rate = 0

        submission_duration = self.max_date - self.min_date
        submission_rate = self._rate(len(self.submissions),
                                     submission_duration)
        submission_score = sum(sub.score for sub in self.submissions.values())

        values = [('Total', len(self.submissions), len(self.comments)),
                  ('Rate (per day)', '{:.2f}'.format(submission_rate),
                   '{:.2f}'.format(comment_rate)),
                  ('Unique Redditors', len(self.submitters),
                   len(self.commenters)),
                  ('Combined Score', submission_score, comment_score)]

        retval = 'Period: {:.2f} days\n\n'.format(submission_duration / 86400.)
        retval += '||Submissions|Comments|\n:-:|--:|--:\n'
        for quad in values:
            retval += '__{}__|{}|{}\n'.format(*quad)
        return retval + '\n'

    def fetch_recent_submissions(self, max_duration):
        """Fetch recent submissions in subreddit with boundaries.

        Does not include posts within the last day as their scores may not be
        representative.

        :param max_duration: When set, specifies the number of days to include

        """
        if max_duration:
            self.min_date = self.max_date - SECONDS_IN_A_DAY * max_duration
        for submission in self.subreddit.new(limit=None):
            if submission.created_utc <= self.min_date:
                break
            if submission.created_utc > self.max_date:
                continue
            self.submissions[submission.id] = MiniSubmission(submission)

    def fetch_submissions(self, submissions_callback, *args):
        """Wrap the submissions_callback function."""
        logger.debug('Fetching submissions')

        submissions_callback(*args)

        logger.info('Found {} submissions'.format(len(self.submissions)))
        if not self.submissions:
            return

        self.min_date = min(x.created_utc for x in self.submissions.values())
        self.max_date = max(x.created_utc for x in self.submissions.values())

        self.process_submitters()
        self.process_commenters()

    def fetch_top_submissions(self, top):
        """Fetch top submissions by some top value.

        :param top: One of week, month, year, all
        :returns: True if any submissions were found.

        """
        for submission in self.subreddit.top(limit=None, time_filter=top):
            self.submissions[submission.id] = MiniSubmission(submission)

    def process_commenters(self):
        """Group comments by author."""
        for index, submission in enumerate(self.submissions.values()):
            if submission.num_comments == 0:
                continue
            real_submission = self.reddit.submission(id=submission.id)
            real_submission.comment_sort = 'top'

            for i in range(3):
                try:
                    real_submission.comments.replace_more(limit=0)
                    break
                except RequestException:
                    if i >= 2:
                        raise
                    logger.debug(
                        'Failed to fetch submission {}, retrying'.format(
                            submission.id))

            self.comments.extend(
                MiniComment(comment, submission)
                for comment in real_submission.comments.list()
                if self.distinguished or comment.distinguished is None)

            if index % 50 == 49:
                logger.debug('Completed: {:4d}/{} submissions'.format(
                    index + 1, len(self.submissions)))

            # Clean up to reduce memory usage
            submission = None
            gc.collect()

        self.comments.sort(key=lambda x: x.created_utc)
        for comment in self.comments:
            if comment.author:
                self.commenters[comment.author].append(comment)

    def process_submitters(self):
        """Group submissions by author."""
        for submission in self.submissions.values():
            if submission.author and (self.distinguished
                                      or submission.distinguished is None):
                self.submitters[submission.author].append(submission)

    def publish_results(self, view, submitters, commenters):
        """Submit the results to the subreddit. Has no return value (None)."""
        def timef(timestamp, date_only=False):
            """Return a suitable string representaation of the timestamp."""
            dtime = datetime.fromtimestamp(timestamp)
            if date_only:
                retval = dtime.strftime('%Y-%m-%d')
            else:
                retval = dtime.strftime('%Y-%m-%d %H:%M PDT')
            return retval

        basic = self.basic_stats()
        top_commenters = self.top_commenters(commenters)
        top_comments = self.top_comments()
        top_submissions = self.top_submissions()

        # Decrease number of top submitters if body is too large.
        body = None
        while body is None or len(body) > 40000 and submitters > 0:
            body = (basic + self.top_submitters(submitters) + top_commenters +
                    top_submissions + top_comments + self.post_footer)
            submitters -= 1

        title = '{} {} {}posts from {} to {}'.format(
            self.post_prefix, str(self.subreddit),
            'top ' if view in TOP_VALUES else '', timef(self.min_date, True),
            timef(self.max_date))

        try:  # Attempt to make the submission
            return self.submit_subreddit.submit(title, selftext=body)
        except Exception:
            logger.exception('Failed to submit to {}'.format(
                self.submit_subreddit))
            self._save_report(title, body)

    def run(self, view, submitters, commenters):
        """Run stats and return the created Submission."""
        logger.info('Analyzing subreddit: {}'.format(self.subreddit))

        if view in TOP_VALUES:
            callback = self.fetch_top_submissions
        else:
            callback = self.fetch_recent_submissions
            view = int(view)
        self.fetch_submissions(callback, view)

        if not self.submissions:
            logger.warning('No submissions were found.')
            return

        return self.publish_results(view, submitters, commenters)

    def top_commenters(self, num):
        """Return a markdown representation of the top commenters."""
        num = min(num, len(self.commenters))
        if num <= 0:
            return ''

        top_commenters = sorted(
            iteritems(self.commenters),
            key=lambda x:
            (-sum(y.score for y in x[1]), -len(x[1]), str(x[0])))[:num]

        retval = self.post_header.format('Top Commenters')
        for author, comments in top_commenters:
            retval += '0. {} ({}, {} comment{})\n'.format(
                self._user(author), self._points(sum(x.score
                                                     for x in comments)),
                len(comments), 's' if len(comments) != 1 else '')
        return '{}\n'.format(retval)

    def top_submitters(self, num):
        """Return a markdown representation of the top submitters."""
        num = min(num, len(self.submitters))
        if num <= 0:
            return ''

        top_submitters = sorted(
            iteritems(self.submitters),
            key=lambda x:
            (-sum(y.score for y in x[1]), -len(x[1]), str(x[0])))[:num]

        retval = self.post_header.format('Top Submitters\' Top Submissions')
        for (author, submissions) in top_submitters:
            retval += '0. {}, {} submission{}: {}\n'.format(
                self._points(sum(x.score for x in submissions)),
                len(submissions), 's' if len(submissions) != 1 else '',
                self._user(author))
            for sub in sorted(submissions, key=lambda x:
                              (-x.score, x.title))[:10]:
                title = self._safe_title(sub)
                if sub.permalink in sub.url:
                    retval += tt('  0. {}').format(title)
                else:
                    retval += tt('  0. [{}]({})').format(title, sub.url)
                retval += ' ({}, [{} comment{}]({}))\n'.format(
                    self._points(sub.score), sub.num_comments,
                    's' if sub.num_comments != 1 else '', self._permalink(sub))
            retval += '\n'
        return retval

    def top_submissions(self):
        """Return a markdown representation of the top submissions."""
        num = min(10, len(self.submissions))
        if num <= 0:
            return ''

        top_submissions = sorted([
            x for x in self.submissions.values()
            if self.distinguished or x.distinguished is None
        ],
                                 key=lambda x:
                                 (-x.score, -x.num_comments, x.title))[:num]

        if not top_submissions:
            return ''

        retval = self.post_header.format('Top Submissions')
        for sub in top_submissions:
            title = self._safe_title(sub)
            if sub.permalink in sub.url:
                retval += tt('0. {}').format(title)
            else:
                retval += tt('0. [{}]({})').format(title, sub.url)

            retval += ' by {} ({}, [{} comment{}]({}))\n'.format(
                self._user(sub.author), self._points(sub.score),
                sub.num_comments, 's' if sub.num_comments != 1 else '',
                self._permalink(sub))
        return tt('{}\n').format(retval)

    def top_comments(self):
        """Return a markdown representation of the top comments."""
        num = min(10, len(self.comments))
        if num <= 0:
            return ''

        top_comments = sorted(self.comments,
                              key=lambda x: (-x.score, str(x.author)))[:num]
        retval = self.post_header.format('Top Comments')
        for comment in top_comments:
            title = self._safe_title(comment.submission)
            retval += tt('0. {}: {}\'s [comment]({}) in {}\n').format(
                self._points(comment.score), self._user(comment.author),
                self._permalink(comment), title)
        return tt('{}\n').format(retval)
示例#31
0
 def _permalink(item):
     if isinstance(item, MiniSubmission):
         return tt('/comments/{}').format(item.id)
     else:
         return tt('/comments/{}//{}?context=1').format(item.submission.id,
                                                        item.id)
示例#32
0
 def _user(user):
     return '_deleted_' if user is None else tt('/u/{}').format(user)