def process(self, users, **kwargs): """ Determine edit count. The parameter *user_handle* can be either a string or an integer or a list of these types. When the *user_handle* type is integer it is interpreted as a user id, and as a user_name for string input. If a list of users is passed to the *process* method then a dict object with edit counts keyed by user handles is returned. - Paramters: - **user_handle** - String or Integer (optionally lists): Value or list of values representing user handle(s). - **is_id** - Boolean. Flag indicating whether user_handle stores user names or user ids """ # Pack args, call thread pool args = self._pack_params() results = mpw.build_thread_pool(users, _process_help, self.k_, args) # Get edit counts from query - all users not appearing have # an edit count of 0 user_set = set([long(user_id) for user_id in users]) edit_count = list() for row in results: edit_count.append([row[0], int(row[1])]) user_set.discard(row[0]) for user in user_set: edit_count.append([user, 0]) self._results = edit_count return self
def _process_help(args): """ Used by Threshold::process() for forking. Should not be called externally. """ state = args[1] thread_args = RevertRateArgsClass(state[0], state[1], state[2], state[3], state[4], state[6], state[7], state[8]) users = args[0] if thread_args.log_progress: logging.info(__name__ + ' :: Computing reverts on %s users (PID %s)' % (len(users), str(os.getpid()))) results_agg = list() dropped_users = 0 umpd_obj = UMP_MAP[thread_args.group](users, thread_args) for user_data in umpd_obj: total_revisions = 0.0 total_reverts = 0.0 # Call query on revert rate for each user # # 1. Obtain user registration date # 2. Compute end date based on 't' # 3. Get user revisions in time period query_args = namedtuple('QueryArgs', 'date_start date_end')\ (format_mediawiki_timestamp(user_data.start), format_mediawiki_timestamp(user_data.end)) try: revisions = query_mod.\ revert_rate_user_revs_query(user_data.user, thread_args.project, query_args) except query_mod.UMQueryCallError as e: logging.error(__name__ + ' :: Failed to ' 'get revisions: {0}'.format(e.message)) dropped_users += 1 continue results_thread = mpw.build_thread_pool(revisions, _revision_proc, thread_args.rev_threads, state) for r in results_thread: total_revisions += r[0] total_reverts += r[1] if not total_revisions: results_agg.append([user_data.user, 0.0, total_revisions]) else: results_agg.append([user_data.user, total_reverts / total_revisions, total_revisions]) if thread_args.log_progress: logging.debug(__name__ + ' :: PID {0} complete. Dropped users = {1}'. format(str(os.getpid()), dropped_users)) return results_agg
def process(self, users, **kwargs): # Process results args = self._pack_params() self._results = mpw.build_thread_pool(users, _process_help, self.k_, args) return self
def process(self, user_handle, **kwargs): # Multiprocessing vs. single processing execution args = [self.project, self.namespace, self.log_, self.datetime_start, self.datetime_end, self.t] self._results = mpw.build_thread_pool(user_handle, _process_help, self.k_, args) return self
def process(self, users, **kwargs): """ Wrapper for specific threshold objects """ args = self._pack_params() self._results = mpw.build_thread_pool(users, _process_help, self.k_, args) return self
def process(self, user_handle, **kwargs): # ensure the handles are iterable if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] args = self._pack_params() self._results = mpw.build_thread_pool(user_handle, _process_help, self.k_, args) return self
def process(self, user_handle, **kwargs): # ensure the handles are iterable if not hasattr(user_handle, "__iter__"): user_handle = [user_handle] # Multiprocessing vs. single processing execution args = self._pack_params() self._results = mpw.build_thread_pool(user_handle, _process_help, self.k_, args) return self
def process(self, user_handle, **kwargs): # ensure the handles are iterable if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] args = [self.project, self.log_, self.look_ahead, self.look_back, self.t, self.datetime_end, self.kr_, self.namespace, self.group] self._results = mpw.build_thread_pool(user_handle, _process_help, self.k_, args) return self
def process(self, users, **kwargs): """ Setup metrics gathering using multiprocessing """ # get revisions args = self._pack_params() revs = mpw.build_thread_pool(users, _get_revisions, self.k_, args) # Start worker threads and aggregate results for bytes added self._results = \ list_sum_by_group(mpw.build_thread_pool(revs, _process_help, self.k_, args), 0) # Add any missing users - O(n) tallied_users = set([str(r[0]) for r in self._results]) for user in users: if not tallied_users.__contains__(str(user)): # Add a row indicating no activity for that user self._results.append([user, 0, 0, 0, 0, 0]) return self
def process(self, users, **kwargs): """ This function gathers threahold (survival) metric data by: :: 1. selecting all new user registrations within the timeframe and in the user list (empty means select all withing the timeframe.) 2. For each user id find the number of revisions before (after) the threshold (survival) cut-off time t - Parameters: - **user_handle** - String or Integer (optionally lists). Value or list of values representing user handle(s). **NOTA BENE** - kwarg "survival" is used to execute has this determine survival rather than a threshold metric """ # Process results args = self._pack_params() self._results = mpw.build_thread_pool(users, _process_help, self.k_, args) return self