def process(self, users, **kwargs):
        """
            Determine edit count.  The parameter *user_handle* can be either
            a string or an integer or a list of these types.  When the
            *user_handle* type is integer it is interpreted as a user id, and
            as a user_name for string input.  If a list of users is passed
            to the *process* method then a dict object with edit counts keyed
            by user handles is returned.

            - Paramters:
                - **user_handle** - String or Integer (optionally lists):
                    Value or list of values representing user handle(s).
                - **is_id** - Boolean.  Flag indicating whether user_handle
                    stores user names or user ids
        """

        # Pack args, call thread pool
        args = self._pack_params()
        results = mpw.build_thread_pool(users, _process_help, self.k_, args)

        # Get edit counts from query - all users not appearing have
        # an edit count of 0
        user_set = set([long(user_id) for user_id in users])
        edit_count = list()
        for row in results:
            edit_count.append([row[0], int(row[1])])
            user_set.discard(row[0])
        for user in user_set:
            edit_count.append([user, 0])

        self._results = edit_count
        return self
Exemplo n.º 2
0
def _process_help(args):
    """ Used by Threshold::process() for forking.
        Should not be called externally. """

    state = args[1]
    thread_args = RevertRateArgsClass(state[0], state[1], state[2],
                                      state[3], state[4], state[6],
                                      state[7], state[8])
    users = args[0]

    if thread_args.log_progress:
        logging.info(__name__ +
                    ' :: Computing reverts on %s users (PID %s)'
                    % (len(users), str(os.getpid())))
    results_agg = list()
    dropped_users = 0

    umpd_obj = UMP_MAP[thread_args.group](users, thread_args)
    for user_data in umpd_obj:

        total_revisions = 0.0
        total_reverts = 0.0

        # Call query on revert rate for each user
        #
        # 1. Obtain user registration date
        # 2. Compute end date based on 't'
        # 3. Get user revisions in time period
        query_args = namedtuple('QueryArgs', 'date_start date_end')\
            (format_mediawiki_timestamp(user_data.start),
             format_mediawiki_timestamp(user_data.end))

        try:
            revisions = query_mod.\
                revert_rate_user_revs_query(user_data.user,
                                            thread_args.project,
                                            query_args)
        except query_mod.UMQueryCallError as e:
            logging.error(__name__ + ' :: Failed to '
                                     'get revisions: {0}'.format(e.message))
            dropped_users += 1
            continue

        results_thread = mpw.build_thread_pool(revisions, _revision_proc,
                                               thread_args.rev_threads, state)

        for r in results_thread:
            total_revisions += r[0]
            total_reverts += r[1]
        if not total_revisions:
            results_agg.append([user_data.user, 0.0, total_revisions])
        else:
            results_agg.append([user_data.user, total_reverts / total_revisions,
                                total_revisions])

    if thread_args.log_progress:
        logging.debug(__name__ + ' :: PID {0} complete. Dropped users = {1}'.
            format(str(os.getpid()), dropped_users))

    return results_agg
Exemplo n.º 3
0
    def process(self, users, **kwargs):

        # Process results
        args = self._pack_params()
        self._results = mpw.build_thread_pool(users, _process_help,
                                              self.k_, args)
        return self
Exemplo n.º 4
0
    def process(self, user_handle, **kwargs):

        # Multiprocessing vs. single processing execution
        args = [self.project, self.namespace, self.log_, self.datetime_start,
                self.datetime_end, self.t]
        self._results = mpw.build_thread_pool(user_handle, _process_help,
                                              self.k_, args)
        return self
    def process(self, users, **kwargs):
        """ Wrapper for specific threshold objects """

        args = self._pack_params()
        self._results = mpw.build_thread_pool(users, _process_help,
                                              self.k_, args)

        return self
    def process(self, user_handle, **kwargs):

        # ensure the handles are iterable
        if not hasattr(user_handle, '__iter__'):
            user_handle = [user_handle]

        args = self._pack_params()
        self._results = mpw.build_thread_pool(user_handle, _process_help,
                                              self.k_, args)
        return self
    def process(self, user_handle, **kwargs):

        # ensure the handles are iterable
        if not hasattr(user_handle, "__iter__"):
            user_handle = [user_handle]

        # Multiprocessing vs. single processing execution
        args = self._pack_params()
        self._results = mpw.build_thread_pool(user_handle, _process_help, self.k_, args)
        return self
Exemplo n.º 8
0
    def process(self, user_handle, **kwargs):

        # ensure the handles are iterable
        if not hasattr(user_handle, '__iter__'):
            user_handle = [user_handle]

        args = [self.project, self.log_, self.look_ahead,
                self.look_back, self.t, self.datetime_end, self.kr_,
                self.namespace, self.group]
        self._results = mpw.build_thread_pool(user_handle, _process_help,
                                              self.k_, args)

        return self
Exemplo n.º 9
0
    def process(self, users, **kwargs):
        """ Setup metrics gathering using multiprocessing """

        # get revisions
        args = self._pack_params()
        revs = mpw.build_thread_pool(users, _get_revisions, self.k_, args)

        # Start worker threads and aggregate results for bytes added

        self._results = \
            list_sum_by_group(mpw.build_thread_pool(revs,
                                                    _process_help,
                                                    self.k_,
                                                    args), 0)

        # Add any missing users - O(n)
        tallied_users = set([str(r[0]) for r in self._results])
        for user in users:
            if not tallied_users.__contains__(str(user)):
                # Add a row indicating no activity for that user
                self._results.append([user, 0, 0, 0, 0, 0])
        return self
Exemplo n.º 10
0
    def process(self, users, **kwargs):
        """
            This function gathers threahold (survival) metric data by: ::

                1. selecting all new user registrations within the timeframe
                    and in the user list (empty means select all withing the
                    timeframe.)
                2. For each user id find the number of revisions before (after)
                    the threshold (survival) cut-off time t

            - Parameters:
                - **user_handle** - String or Integer (optionally lists).
                    Value or list of values representing user handle(s).

            **NOTA BENE** - kwarg "survival" is used to execute has this
                determine survival rather than a threshold metric
        """

        # Process results
        args = self._pack_params()
        self._results = mpw.build_thread_pool(users, _process_help,
                                              self.k_, args)
        return self