Exemplos de info em Python, exemplos de user_metrics.config.logging.info em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: data_loader.py Projeto: dartar/user_metrics

    def list_from_xsv(self, xsv_name, separator='\t', header=False):
        """
            Parse element from separated value file.  Return a list
            containing the values matched on each line of the file.

            Parameters:
                - **xsv_name**: String.  filename of the .xsv; it is
                    assumed to live in the project data folder
                - **index**: Integer. Index of the element to retrieve
                - **separator**: String.  The separating character in
                    the file.  Default to tab.
                - **header**: Boolean.  Flag indicating whether the
                    file has a header.

            Return:
                - List(string).  List of elements parsed from xsv.
        """
        out = list()
        try:
            xsv_file = open(projSet.__data_file_dir__ + xsv_name, 'r')
        except IOError as e:
            logging.info('Could not open xsv for writing: %s' % e.message)
            return out

        # Process file line-by-line
        if header: xsv_file.readline()
        while 1:
            line = xsv_file.readline().strip()
            if line == '': break
            tokens = line.split(separator)
            out.append([str(tokens[index]) for index in xrange(len(tokens))])
        return out

Exemplo n.º 2

0

Exibir arquivo

Arquivo: revert_rate.py Projeto: dartar/user_metrics

def _process_help(args):
    """ Used by Threshold::process() for forking.
        Should not be called externally. """

    state = args[1]
    thread_args = RevertRateArgsClass(state[0], state[1], state[2],
                                      state[3], state[4], state[6],
                                      state[7], state[8])
    users = args[0]

    if thread_args.log_progress:
        logging.info(__name__ +
                    ' :: Computing reverts on %s users (PID %s)'
                    % (len(users), str(os.getpid())))
    results_agg = list()
    dropped_users = 0

    umpd_obj = UMP_MAP[thread_args.group](users, thread_args)
    for user_data in umpd_obj:

        total_revisions = 0.0
        total_reverts = 0.0

        # Call query on revert rate for each user
        #
        # 1. Obtain user registration date
        # 2. Compute end date based on 't'
        # 3. Get user revisions in time period
        query_args = namedtuple('QueryArgs', 'date_start date_end')\
            (format_mediawiki_timestamp(user_data.start),
             format_mediawiki_timestamp(user_data.end))

        try:
            revisions = query_mod.\
                revert_rate_user_revs_query(user_data.user,
                                            thread_args.project,
                                            query_args)
        except query_mod.UMQueryCallError as e:
            logging.error(__name__ + ' :: Failed to '
                                     'get revisions: {0}'.format(e.message))
            dropped_users += 1
            continue

        results_thread = mpw.build_thread_pool(revisions, _revision_proc,
                                               thread_args.rev_threads, state)

        for r in results_thread:
            total_revisions += r[0]
            total_reverts += r[1]
        if not total_revisions:
            results_agg.append([user_data.user, 0.0, total_revisions])
        else:
            results_agg.append([user_data.user, total_reverts / total_revisions,
                                total_revisions])

    if thread_args.log_progress:
        logging.debug(__name__ + ' :: PID {0} complete. Dropped users = {1}'.
            format(str(os.getpid()), dropped_users))

    return results_agg

Exemplo n.º 3

0

Exibir arquivo

Arquivo: data_loader.py Projeto: dartar/user_metrics

    def get_elem_from_nested_list(self, in_list, index):
        """
            Parse element from separated value file.  Return a list
            containing the values matched on each line of the file.

            Usage: ::

                >>> el = DL.ExperimentsLoader()
                >>> results = el.execute_SQL(SQL_query_string)
                >>> new_results = el.get_elem_from_nested_list(results,0)

            Parameters:
                - **in_list**: List(List(\*)). List of lists from which
                    to parse elements.
                - **index**: Integer. Index of the element to retrieve

            Return:
                - List(\*).  List of sub-elements parsed from list.
        """

        out_list = list()

        for elem in in_list:
            try:
                out_list.append(elem[index])
            except Exception:
                logging.info('Unable to extract index %s from %s' % (
                    str(index), str(elem)))

        return out_list

Exemplo n.º 4

0

Exibir arquivo

Arquivo: user_metric.py Projeto: sudeepdas/E3_analysis

        def wrapper(self, users, **kwargs):

            # If users are empty flag an error
            if not users:
                raise UserMetricError('No users to pass to process method.')

            # Ensure user IDs are strings
            users = dl.DataLoader().cast_elems_to_string(users)

            # Add attributes from _param_types
            self.assign_attributes(kwargs, 'process')

            # Echo input params for metric process call
            if hasattr(self, 'log_') and self.log_:
                logging.info(__name__ + ' :: parameters = ' + str(kwargs))

            return proc_func(self, users, **kwargs)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: data.py Projeto: dartar/user_metrics

def get_users(cohort_expr):
    """ get users from cohort """

    if search(COHORT_REGEX, cohort_expr):
        logging.info(__name__ + ' :: Processing cohort by expression.')
        users = [user for user in parse_cohorts(cohort_expr)]
    else:
        logging.info(__name__ + ' :: Processing cohort by tag name.')
        try:
            id = query_mod.get_cohort_id(cohort_expr)
            users = [u for u in query_mod.get_cohort_users(id)]
        except (IndexError, TypeError,
                query_mod.UMQueryCallError) as e:
            logging.error(__name__ + ' :: Could not retrieve users '
                                     'for cohort {0}: {1}'.
                format(cohort_expr, str(e)))
            return []
    return users

Exemplo n.º 6

0

Exibir arquivo

Arquivo: time_to_threshold.py Projeto: kl07/wikipedia_user_metrics

def _process_help(args):
    """
        First determine if the user has made an adequate number of
        edits.  If so, compute the number of minutes that passed
        between the Nth and Mth edit.

            - Parameters:
                - **user_handle** - List(int).  List of user ids.
                - **first_edit** - Integer.  The numeric value of
                    the first edit from which to measure the threshold.
                - **threshold_edit** - Integer.  The numeric value of
                    the threshold edit from which to measure the
                    threshold
    """

    # Unpack args
    state = args[1]
    users = args[0]

    thread_args = um.UserMetric._unpack_params(state)

    if thread_args.log_:
        logging.debug(__name__ + '::Computing Time to threshold on '
                                 '{0} users. (PID = {1})'.format(len(users),
                                                                 getpid()))

    minutes_to_threshold = list()

    # For each user gather their revisions and produce a time diff
    for user in users:
        revs = query_mod.\
            time_to_threshold_revs_query(user, thread_args.project, None)
        revs = [rev[0] for rev in revs]
        minutes_to_threshold.append(
            [user, get_minute_diff_result(revs,
                                          thread_args.threshold_edit,
                                          thread_args.first_edit)])

    if thread_args.log_:
        logging.info(__name__ + '::Processed PID = {0}.'.format(getpid()))

    return minutes_to_threshold

Exemplo n.º 7

0

Exibir arquivo

Arquivo: time_series_process_methods.py Projeto: wikimedia/analytics-user-metrics

def time_series_listener(process_queue, event_queue):
    """
        Listener for ``time_series_worker``.  Blocks and logs until all
        processes computing time series data are complete.  Returns time
        dependent data from metrics.

        Parameters
        ~~~~~~~~~~

            process_queue : list
                List of active processes computing metrics data.

            event_queue : multiprocessing.Queue
                Asynchronous data coming in from worker processes.
    """
    data = list()

    while 1:
        # sleep before checking worker threads
        time.sleep(PROCESS_SLEEP_TIME)

        logging.info(__name__ + ' :: Time series process queue\n'
                                '\t{0} threads. (PID = {1})'.
            format(str(len(process_queue)), os.getpid()))

        while not event_queue.empty():
            data.extend(event_queue.get())
        for p in process_queue:
            if not p.is_alive():
                p.terminate()
                process_queue.remove(p)

        # exit if all process have finished
        if not len(process_queue):
            break

    # sort
    return sorted(data, key=operator.itemgetter(0), reverse=False)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: namespace_of_edits.py Projeto: wikimedia/analytics-user-metrics

def _process_help(args):
    """
        Worker thread method for NamespaceOfEdits::process().
    """

    users = args[0]
    state = args[1]

    metric_params = um.UserMetric._unpack_params(state)
    query_args_type = namedtuple('QueryArgs', 'start end')

    if metric_params.log_:
        logging.info(__name__ + '::Computing namespace edits. (PID = %s)' %
                                getpid())

    # Tally counts of namespace edits
    results = dict()
    ump_res = UMP_MAP[metric_params.group](users, metric_params)
    for ump_rec in ump_res:

        results[str(ump_rec.user)] = OrderedDict()

        for ns in NamespaceEdits.VALID_NAMESPACES:
            results[str(ump_rec.user)][str(ns)] = 0

        query_results = query_mod.namespace_edits_rev_query([ump_rec.user],
            metric_params.project,
            query_args_type(ump_rec.start, ump_rec.end))

        for row in query_results:
            try:
                if row[1] in NamespaceEdits.VALID_NAMESPACES:
                    results[str(row[0])][str(row[1])] = int(row[2])
            except (KeyError, IndexError):
                logging.error(__name__ + "::Could not process row: %s" % str(row))
                continue

    return [(user, results[user]) for user in results]

Exemplo n.º 9

0

Exibir arquivo

Arquivo: threshold.py Projeto: dartar/user_metrics

def _process_help(args):
    """ Used by Threshold::process() for forking.
        Should not be called externally. """

    # Unpack args
    users = args[0]
    state = args[1]

    metric_params = um.UserMetric._unpack_params(state)

    if metric_params.log_:
        logging.info(__name__ + ' :: Processing revision data ' +
                                '(%s users) by user... (PID = %s)' % (
                                    len(users), os.getpid()))
        logging.info(__name__ + ' :: ' + str(metric_params))

    # only proceed if there is user data
    if not len(users):
        return []

    results = list()
    dropped_users = 0
    umpd_obj = UMP_MAP[metric_params.group](users, metric_params)
    for t in umpd_obj:
        uid = long(t.user)
        try:
            count = query_mod.rev_count_query(uid,
                                              metric_params.survival_,
                                              metric_params.namespace,
                                              metric_params.project,
                                              t.start,
                                              t.end)
        except query_mod.UMQueryCallError:
            dropped_users += 1
            continue

        if count < metric_params.n:
            results.append((uid, 0))
        else:
            results.append((uid, 1))

    if metric_params.log_:
        logging.info(__name__ + '::Processed PID = %s.  '
                                'Dropped users = %s.' % (
                                    os.getpid(), str(dropped_users)))

    return results

Exemplo n.º 10

0

Exibir arquivo

Arquivo: pages_created.py Projeto: kl07/wikipedia_user_metrics

def _process_help(args):
    """ Used by Threshold::process() for forking.
        Should not be called externally. """

    # Unpack args
    users = args[0]
    state = args[1]

    metric_params = um.UserMetric._unpack_params(state)

    if metric_params.log_:
        logging.info(__name__ + ' :: Processing pages created data ' +
                                '(%s users) by user... (PID = %s)' % (
                                    len(users), os.getpid()))
        logging.info(__name__ + ' :: ' + str(metric_params))

    # only proceed if there is user data
    if not len(users):
        return []

    results = list()
    dropped_users = 0
    umpd_obj = UMP_MAP[metric_params.group](users, metric_params)
    for t in umpd_obj:
        uid = long(t.user)
        try:
            count = query_mod.pages_created_query(uid,
                                                  metric_params.project,
                                                  metric_params)
            print count
        except query_mod.UMQueryCallError:
            dropped_users += 1
            continue

        try:
            results.append((str(uid), count[0][0]))
        except TypeError:
            dropped_users += 1

    if metric_params.log_:
        logging.info(__name__ + '::Processed PID = %s.  '
                                'Dropped users = %s.' % (
                                    os.getpid(), str(dropped_users)))

    return results

Exemplo n.º 11

0

Exibir arquivo

Arquivo: users.py Projeto: kl07/wikipedia_user_metrics

def generate_test_cohort(project,
                         max_size=10,
                         write=False,
                         user_interval_size=1,
                         rev_interval_size=7,
                         rev_lower_limit=0):
    """
        Build a test cohort (list of UIDs) for the given project.

        Parameters
        ~~~~~~~~~~

        project : str
           Wikipedia project e.g. 'enwiki'.

        size : uint
           Number of users to include in the cohort.

        write: boolean
           Flag indicating whether to write the cohort to
           settings.__cohort_meta_db__ and settings.__cohort_db__.

        user_interval_size: uint
            Number of days within which to take registered users

        rev_lower_limit: int
            Minimum number of revisions a user must have between registration
            and the

        Returns the list of UIDs from the corresponding project that defines
        the test cohort.
    """

    # Determine the time bounds that define the cohort acceptance criteria

    ts_start_o = datetime.now() + timedelta(days=-60)
    ts_end_user_o = ts_start_o + timedelta(days=int(user_interval_size))
    ts_end_revs_o = ts_start_o + timedelta(days=int(rev_interval_size))

    ts_start = format_mediawiki_timestamp(ts_start_o)
    ts_end_user = format_mediawiki_timestamp(ts_end_user_o)
    ts_end_revs = format_mediawiki_timestamp(ts_end_revs_o)

    # Synthesize query and execute
    logging.info(__name__ + ' :: Getting users from {0}.\n\n'
                            '\tUser interval: {1} - {2}\n'
                            '\tRevision interval: {1} - {3}\n'
                            '\tMax users = {4}\n'
                            '\tMin revs = {5}\n'.
                            format(project,
                                   ts_start,
                                   ts_end_user,
                                   ts_end_revs,
                                   max_size,
                                   rev_lower_limit
                                   )
                 )
    query = sub_tokens(SELECT_PROJECT_IDS, db=escape_var(str(project)))

    # @TODO MOVE DB REFS INTO QUERY MODULE

    try:
        params = {
            'ts_start': str(ts_start),
            'ts_end_user': str(ts_end_user),
            'ts_end_revs': str(ts_end_revs),
            'max_size': int(max_size),
            'rev_lower_limit': int(rev_lower_limit),
        }
    except ValueError as e:
        raise Exception(__name__ + ' :: Bad params ' + str(e))

    conn = Connector(instance=settings.PROJECT_DB_MAP[project])
    conn._cur_.execute(query, params)

    users = [row for row in conn._cur_]
    del conn

    # get latest cohort id & cohort name
    utm_name = generate_test_cohort_name(project)

    # add new ids to usertags & usertags_meta
    if write:
        logging.info(__name__ + ' :: Inserting records...\n\n'
                                '\tCohort name - {0}\n'
                                '\t{2} - {3} record(s)\n'.
                                format(utm_name,
                                       settings.__cohort_db__,
                                       len(users)))
        query_mod.add_cohort_data(utm_name, users, project)

    return users

Exemplo n.º 12

0

Exibir arquivo

Arquivo: request_manager.py Projeto: wikimedia/analytics-user-metrics

def job_control():
    """
        Controls the execution of user metrics requests

        Parameters
        ~~~~~~~~~~

        request_queue : multiprocessing.Queue
           Queues incoming API requests.

    """

    # Store executed and pending jobs respectively
    job_queue = list()

    # Global job ID number
    job_id = 0

    # Tallies the number of concurrently running jobs
    concurrent_jobs = 0

    log_name = '{0} :: {1}'.format(__name__, job_control.__name__)

    logging.debug('{0} - STARTING...'.format(log_name))

    while 1:

        time.sleep(RESQUEST_TIMEOUT)

        # Request Queue Processing
        # ------------------------

        # logging.debug(log_name + ' :: POLLING REQUESTS...')
        logging.debug(log_name + ' :: JOB QUEUE - {0}'.format(str(job_queue)))
        req_item = None

        # Only process if there are fewer than the maximum number of concurrent
        # jobs
        if concurrent_jobs < MAX_CONCURRENT_JOBS:

            # Pop from request target
            req_item = umapi_broker_context.pop(REQUEST_BROKER_TARGET)

            # Push to process target
            if req_item:
                url_hash = sha1(req_item.encode('utf-8')).hexdigest()
                umapi_broker_context.add(PROCESS_BROKER_TARGET, url_hash,
                                         req_item)

                logging.debug(log_name + ' :: PULLING item from request queue -> '
                                         '\n\t{0}'
                              .format(req_item))

        # Process complete jobs
        # ---------------------

        if concurrent_jobs:
            for job_item in job_queue:

                if not job_item.queue.empty():

                    logging.info(log_name + ' :: READING RESPONSE - {0}'.
                        format(job_item.request))

                    # Pull data off of the queue and add it to response queue
                    data = ''
                    while not job_item.queue.empty():
                        data += job_item.queue.get(True)

                    # Remove from process target
                    url_hash = sha1(job_item.request.encode('utf-8')).hexdigest()
                    try:
                        umapi_broker_context.remove(PROCESS_BROKER_TARGET,
                                                    url_hash)
                    except Exception as e:
                        logging.error(log_name + ' :: Could not process '
                                                 '{0} from {1}  -- {2}'.
                            format(job_item.request,
                                   PROCESS_BROKER_TARGET,
                                   e.message))

                    # Add to response target
                    umapi_broker_context.add(RESPONSE_BROKER_TARGET, url_hash,
                                             pack_response_for_broker(
                                                 job_item.request, data))

                    del job_queue[job_queue.index(job_item)]
                    concurrent_jobs -= 1
                    logging.debug(log_name + ' :: RUN -> RESPONSE - Job ID {0}'
                                             '\n\tConcurrent jobs = {1}'
                                  .format(str(job_item.id), concurrent_jobs))

        # Process request
        # ---------------

        if req_item:
            req_q = Queue()
            proc = Process(target=process_metrics, args=(req_q, req_item))
            proc.start()

            job_item = job_item_type(job_id, proc, req_item, req_q)
            job_queue.append(job_item)

            concurrent_jobs += 1
            job_id += 1

            logging.debug(log_name + ' :: WAIT -> RUN - Job ID {0}'
                                     '\n\tConcurrent jobs = {1}, REQ = {2}'
                          .format(str(job_id), concurrent_jobs, req_item))

    logging.debug('{0} - FINISHING.'.format(log_name))

Exemplo n.º 13

0

Exibir arquivo

Arquivo: time_series_process_methods.py Projeto: wikimedia/analytics-user-metrics

def build_time_series(start, end, interval, metric, aggregator, cohort,
                      **kwargs):
    """
        Builds a timeseries dataset for a given metric.

        Parameters:

            start: str or datetime.
                date + time indicating start of time series

            end : str or datetime.
                date + time indicating end of time series

            interval : int.
                integer value in hours that defines the amount of
                time between data-points

            metric : class object.
                Metrics class (derived from UserMetric)

            aggregator : method.
                Aggregator method used to aggregate data for time
                series data points

            cohort : list(str).
                list of user IDs

        e.g.

        >>> cohort = ['156171','13234584']
        >>> metric = ba.BytesAdded
        >>> aggregator = agg.list_sum_indices

        >>> build_time_series('20120101000000', '20120112000000', 24, metric,
                aggregator, cohort,
            num_threads=4, num_threads_metric=2, log=True)

    """

    log = bool(kwargs['log']) if 'log' in kwargs else False

    # Get datetime types, and the number of threads
    start = date_parse(format_mediawiki_timestamp(start))
    end = date_parse(format_mediawiki_timestamp(end))
    k = kwargs['kt_'] if 'kt_' in kwargs else MAX_THREADS

    # Compute window size and ensure that all the conditions
    # necessary to generate a proper time series are met
    num_intervals = int((end - start).total_seconds() / (3600 * interval))
    intervals_per_thread = num_intervals / k

    # Compose the sets of time series lists
    f = lambda t, i:  t + datetime.timedelta(
        hours=int(intervals_per_thread * interval * i))
    time_series = [_get_timeseries(f(start, i),
                   f(start, i+1), interval) for i in xrange(k)]
    if f(start, k) < end:
        time_series.append(_get_timeseries(f(start, k), end, interval))

    event_queue = Queue()
    process_queue = list()

    if log:
        logging.info(__name__ + ' :: Spawning procs\n'
                                '\t%s - %s, interval = %s\n'
                                '\tthreads = %s ... ' % (str(start), str(end),
                                                       interval, k))
    for i in xrange(len(time_series)):
        p = Process(target=time_series_worker,
                    args=(time_series[i], metric, aggregator,
                          cohort, event_queue, kwargs))
        p.start()
        process_queue.append(p)

    # Call the listener
    return time_series_listener(process_queue, event_queue)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: time_series_process_methods.py Projeto: wikimedia/analytics-user-metrics

def time_series_worker(time_series,
                       metric,
                       aggregator,
                       cohort,
                       event_queue,
                       kwargs):
    """
        Worker thread which computes time series data for a set of points

        Parameter
        ~~~~~~~~~

            time_series : list(datetime)
                Datetimes defining series.

            metric : string
                Metric name.

            aggregator : method
                aggregator method reference.

            cohort : string
                Cohort name.

            event_queue : multiporcessing.Queue
                Asynchronous data-structure to communicate with parent proc.
    """
    log = bool(kwargs['log']) if 'log' in kwargs else False

    data = list()
    ts_s = time_series.next()
    new_kwargs = deepcopy(kwargs)

    # re-map some keyword args relating to thread counts
    if 'metric_threads' in new_kwargs:
        d = json.loads(new_kwargs['metric_threads'])
        for key in d:
            new_kwargs[key] = d[key]
        del new_kwargs['metric_threads']

    while 1:
        try:
            ts_e = time_series.next()
        except StopIteration:
            break

        if log:
            logging.info(__name__ + ' :: Processing thread:\n'
                                    '\t{0}, {1} - {2} ...'.format(os.getpid(),
                                                                  str(ts_s),
                                                                  str(ts_e)))

        metric_obj = metric(datetime_start=ts_s, datetime_end=ts_e, **new_kwargs).\
            process(cohort, **new_kwargs)

        r = agg_engine(aggregator, metric_obj, metric.header())

        if log:
            logging.info(__name__ + ' :: Processing complete:\n'
                                    '\t{0}, {1} - {2} ...'.format(os.getpid(),
                                                                  str(ts_s),
                                                                  str(ts_e)))
        data.append([str(ts_s), str(ts_e)] + r.data)
        ts_s = ts_e

    event_queue.put(data)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: request_manager.py Projeto: wikimedia/user_metrics

def process_metrics(p, request_meta):
    """
        Worker process for requests, forked from the job controller.  This
        method handles:

            * Filtering cohort type: "regular" cohort, single user, user group
            * Secondary validation
            *
    """

    log_name = '{0} :: {1}'.format(__name__, process_metrics.__name__)

    logging.info(log_name + ' - START JOB'
                            '\n\tCOHORT = {0} - METRIC = {1}'
                            ' -  PID = {2})'.
        format(request_meta.cohort_expr, request_meta.metric, getpid()))

    err_msg = __name__ + ' :: Request failed.'
    users = list()

    # obtain user list - handle the case where a lone user ID is passed
    # !! The username should already be validated
    if request_meta.is_user:
        uid = MediaWikiUser.is_user_name(request_meta.cohort_expr,
                                         request_meta.project)
        if uid:
            valid = True
            users = [uid]
        else:
            valid = False
            err_msg = error_codes[3]

    # The "all" user group.  All users within a time period.
    elif request_meta.cohort_expr == 'all':
        users = MediaWikiUser(query_type=1)

        try:
            users = [u for u in users.get_users(
                request_meta.start, request_meta.end,
                project=request_meta.project)]
            valid = True
        except Exception:
            valid = False
            err_msg = error_codes[5]

    # "TYPICAL" COHORT PROCESSING
    else:
        users = get_users(request_meta.cohort_expr)

        # Default project is what is stored in usertags_meta
        project = query_mod.get_cohort_project_by_meta(
            request_meta.cohort_expr)
        if project:
            request_meta.project = project
        logging.debug(__name__ + ' :: Using default project from ' \
                                 'usertags_meta {0}.'.format(project))

        valid = True
        err_msg = ''

    if valid:
        # process request
        results = process_data_request(request_meta, users)
        results = str(results)
        response_size = getsizeof(results, None)

        if response_size > MAX_BLOCK_SIZE:
            index = 0

            # Dump the data in pieces - block until it is picked up
            while index < response_size:
                p.put(results[index:index+MAX_BLOCK_SIZE], block=True)
                index += MAX_BLOCK_SIZE
        else:
            p.put(results, block=True)

        logging.info(log_name + ' - END JOB'
                                '\n\tCOHORT = {0} - METRIC = {1}'
                                ' -  PID = {2})'.
            format(request_meta.cohort_expr, request_meta.metric, getpid()))

    else:
        p.put(err_msg, block=True)
        logging.info(log_name + ' - END JOB - FAILED.'
                                '\n\tCOHORT = {0} - METRIC = {1}'
                                ' -  PID = {2})'.
        format(request_meta.cohort_expr, request_meta.metric, getpid()))

Exemplo n.º 16

0

Exibir arquivo

Arquivo: request_manager.py Projeto: wikimedia/user_metrics

def process_data_request(request_meta, users):
    """
        Main entry point of the module, prepares results for a given request.
        Coordinates a request based on the following parameters::

            metric_handle (string) - determines the type of metric object to
            build.  Keys metric_dict.

            users (list) - list of user IDs.

            **kwargs - Keyword arguments may contain a variety of variables.
            Most notably, "aggregator" if the request requires aggregation,
            "time_series" flag indicating a time series request.  The
            remaining kwargs specify metric object parameters.
    """

    # Set interval length in hours if not present
    if not request_meta.slice:
        request_meta.slice = DEFAULT_INERVAL_LENGTH
    else:
        request_meta.slice = float(request_meta.slice)

    # Get the aggregator key
    agg_key = get_agg_key(request_meta.aggregator, request_meta.metric) if \
        request_meta.aggregator else None

    args = ParameterMapping.map(request_meta)

    # Initialize the results
    results, metric_class, metric_obj = format_response(request_meta)

    start = metric_obj.datetime_start
    end = metric_obj.datetime_end

    if results['type'] == request_types.time_series:

        # Get aggregator
        try:
            aggregator_func = get_aggregator_type(agg_key)
        except MetricsAPIError as e:
            results['data'] = 'Request failed. ' + e.message
            return results

        # Determine intervals and thread allocation
        total_intervals = (date_parse(end) - date_parse(start)).\
                          total_seconds() / (3600 * request_meta.slice)
        time_threads = max(1, int(total_intervals / INTERVALS_PER_THREAD))
        time_threads = min(MAX_THREADS, time_threads)

        logging.info(__name__ + ' :: Initiating time series for %(metric)s\n'
                                '\tAGGREGATOR = %(agg)s\n'
                                '\tFROM: %(start)s,\tTO: %(end)s.' %
                                {
                                    'metric': metric_class.__name__,
                                    'agg': request_meta.aggregator,
                                    'start': str(start),
                                    'end': str(end),
                                    })
        metric_threads = '"k_" : {0}, "kr_" : {1}'.format(USER_THREADS,
            REVISION_THREADS)
        metric_threads = '{' + metric_threads + '}'

        new_kwargs = deepcopy(args)

        del new_kwargs['slice']
        del new_kwargs['aggregator']
        del new_kwargs['datetime_start']
        del new_kwargs['datetime_end']

        out = tspm.build_time_series(start,
            end,
            request_meta.slice,
            metric_class,
            aggregator_func,
            users,
            kt_=time_threads,
            metric_threads=metric_threads,
            log=True,
            **new_kwargs)

        results['header'] = ['timestamp'] + \
                            getattr(aggregator_func,
                                    um.METRIC_AGG_METHOD_HEAD)
        for row in out:
            timestamp = date_parse(row[0][:19]).strftime(
                DATETIME_STR_FORMAT)
            results['data'][timestamp] = row[3:]

    elif results['type'] == request_types.aggregator:

        # Get aggregator
        try:
            aggregator_func = get_aggregator_type(agg_key)
        except MetricsAPIError as e:
            results['data'] = 'Request failed. ' + e.message
            return results

        logging.info(__name__ + ' :: Initiating aggregator for %(metric)s\n'
                                '\AGGREGATOR = %(agg)s\n'
                                '\tFROM: %(start)s,\tTO: %(end)s.' %
                                {
                                    'metric': metric_class.__name__,
                                    'agg': request_meta.aggregator,
                                    'start': str(start),
                                    'end': str(end),
                                    })

        try:
            metric_obj.process(users,
                               k_=USER_THREADS,
                               kr_=REVISION_THREADS,
                               log_=True,
                               **args)
        except UserMetricError as e:
            logging.error(__name__ + ' :: Metrics call failed: ' + str(e))
            results['data'] = str(e)
            return results

        r = um.aggregator(aggregator_func, metric_obj, metric_obj.header())
        results['header'] = to_string(r.header)
        results['data'] = r.data[1:]

    elif results['type'] == request_types.raw:

        logging.info(__name__ + ':: Initiating raw request for %(metric)s\n'
                                '\tFROM: %(start)s,\tTO: %(end)s.' %
                                {
                                    'metric': metric_class.__name__,
                                    'start': str(start),
                                    'end': str(end),
                                    })
        try:
            metric_obj.process(users,
                               k_=USER_THREADS,
                               kr_=REVISION_THREADS,
                               log_=True,
                               **args)
        except UserMetricError as e:
            logging.error(__name__ + ' :: Metrics call failed: ' + str(e))
            results['data'] = str(e)
            return results

        for m in metric_obj.__iter__():
            results['data'][m[0]] = m[1:]

    return results

Exemplo n.º 17

0

Exibir arquivo

Arquivo: table_loader.py Projeto: dartar/user_metrics

    def build_table_query(self,
                          select_fields,
                          table_name,
                          where_fields=None,
                          where_ops=None,
                          group_fields=None,
                          order_fields=None):
        """
            Constructs a SQL query given the parameters.

            Parmeters
            ~~~~~~~~~
                select_fields : List(string)
                    Column names to return in query
                where_fields : List(string)
                    Statements which to condition results
                where_ops : List(string)
                    Logical operators on which to combine where statements
                        *[optional]*
                group_fields : List(string)
                    Column names to group on *[optional]*
                order_fields : List(string).
                    Column names to order by *[optional]*

            Return a formatted SQL query constructed from parameters. Note
            that this may be an invalid query if the input was not well formed.
        """

        # Pre- process defaults
        if where_fields is None: where_fields = []
        if where_ops is None: where_ops = []
        if group_fields is None: group_fields = []
        if order_fields is None: order_fields = []

        # Begin function
        try:

            select_str = 'select '
            for field in select_fields:
                select_str = field + ','
            select_str = select_str[:-1]

            if where_fields:
                where_str = 'where '
                for index in range(len(where_ops)):
                    where_str = where_fields[index] + ' ' + \
                                where_ops[index] + ' '
                where_str = where_str + where_fields[len(where_ops)]
            else:
                where_str = ''

            if group_fields:
                group_str = 'group by '
                for field in group_fields:
                    group_str = field + ','
                group_str = group_str[:-1]
            else:
                group_str = ''

            if order_fields:
                order_str = 'order by '
                for field in order_fields:
                    order_str = field + ','
                order_str = order_str[:-1]
            else:
                order_str = ''

            sql = '%s from %s %s %s %s' % (select_str, table_name, where_str,
                                           group_str, order_str)

        except Exception:
            logging.info('Could not build query for %s: ' % table_name)
            sql = ''

        return sql