예제 #1
0
def process_metrics(p, request_meta):
    """
        Worker process for requests, forked from the job controller.  This
        method handles:

            * Filtering cohort type: "regular" cohort, single user, user group
            * Secondary validation
            *
    """

    log_name = '{0} :: {1}'.format(__name__, process_metrics.__name__)

    logging.info(log_name + ' - START JOB'
                            '\n\tCOHORT = {0} - METRIC = {1}'
                            ' -  PID = {2})'.
        format(request_meta.cohort_expr, request_meta.metric, getpid()))

    err_msg = __name__ + ' :: Request failed.'
    users = list()

    # obtain user list - handle the case where a lone user ID is passed
    # !! The username should already be validated
    if request_meta.is_user:
        uid = MediaWikiUser.is_user_name(request_meta.cohort_expr,
                                         request_meta.project)
        if uid:
            valid = True
            users = [uid]
        else:
            valid = False
            err_msg = error_codes[3]

    # The "all" user group.  All users within a time period.
    elif request_meta.cohort_expr == 'all':
        users = MediaWikiUser(query_type=1)

        try:
            users = [u for u in users.get_users(
                request_meta.start, request_meta.end,
                project=request_meta.project)]
            valid = True
        except Exception:
            valid = False
            err_msg = error_codes[5]

    # "TYPICAL" COHORT PROCESSING
    else:
        users = get_users(request_meta.cohort_expr)

        # Default project is what is stored in usertags_meta
        project = query_mod.get_cohort_project_by_meta(
            request_meta.cohort_expr)
        if project:
            request_meta.project = project
        logging.debug(__name__ + ' :: Using default project from ' \
                                 'usertags_meta {0}.'.format(project))

        valid = True
        err_msg = ''

    if valid:
        # process request
        results = process_data_request(request_meta, users)
        results = str(results)
        response_size = getsizeof(results, None)

        if response_size > MAX_BLOCK_SIZE:
            index = 0

            # Dump the data in pieces - block until it is picked up
            while index < response_size:
                p.put(results[index:index+MAX_BLOCK_SIZE], block=True)
                index += MAX_BLOCK_SIZE
        else:
            p.put(results, block=True)

        logging.info(log_name + ' - END JOB'
                                '\n\tCOHORT = {0} - METRIC = {1}'
                                ' -  PID = {2})'.
            format(request_meta.cohort_expr, request_meta.metric, getpid()))

    else:
        p.put(err_msg, block=True)
        logging.info(log_name + ' - END JOB - FAILED.'
                                '\n\tCOHORT = {0} - METRIC = {1}'
                                ' -  PID = {2})'.
        format(request_meta.cohort_expr, request_meta.metric, getpid()))
예제 #2
0
def output(cohort, metric):
    """ View corresponding to a data request -
        All of the setup and execution for a request happens here. """

    # Get URL.  Check for refresh flag - drop from url
    url = request.url.split(request.url_root)[1]
    refresh = True if 'refresh' in request.args else False
    if refresh:
        url = sub(REFRESH_REGEX, '', url)

    # Get the refresh date of the cohort
    try:
        cid = query_mod.get_cohort_id(cohort)
        cohort_refresh_ts = get_cohort_refresh_datetime(cid)
    except Exception:
        cohort_refresh_ts = None
        logging.error(__name__ + ' :: Could not retrieve refresh '
                                 'time of cohort.')

    # Build a request and validate.
    #
    # 1. Populate with request parameters from query args.
    # 2. Filter the input discarding any url junk
    # 3. Process defaults for request parameters
    # 4. See if this maps to a single user request
    # 5. See if this maps to a single user request
    try:
        rm = RequestMetaFactory(cohort, cohort_refresh_ts, metric)
    except MetricsAPIError as e:
        return redirect(url_for('all_cohorts') + '?error=' +
                        str(e.error_code))

    filter_request_input(request, rm)
    try:
        format_request_params(rm)
    except MetricsAPIError as e:
        return redirect(url_for('all_cohorts') + '?error=' +
                        str(e.error_code))

    if rm.is_user:
        project = rm.project if rm.project else 'enwiki'
        if not MediaWikiUser.is_user_name(cohort, project):
            logging.error(__name__ + ' :: "{0}" is not a valid username '
                                     'in "{1}"'.format(cohort, project))
            return redirect(url_for('all_cohorts') + '?error=3')
    else:
        # @TODO CALL COHORT VALIDATION HERE
        pass

    # Determine if the request maps to an existing response.
    #
    # 1. The response already exists in the hash, return.
    # 2. Otherwise, add the request tot the queue.
    data = get_data(rm)
    key_sig = build_key_signature(rm, hash_result=True)

    # Is the request already running?
    is_running = req_cb_get_is_running(key_sig, VIEW_LOCK)

    # Determine if request is already hashed
    if data and not refresh:
        return make_response(jsonify(data))

    # Determine if the job is already running
    elif is_running:
        return render_template('processing.html',
                               error=error_codes[0],
                               url_str=str(rm))

    # Add the request to the queue
    else:
        api_request_queue.put(unpack_fields(rm), block=True)
        req_cb_add_req(key_sig, url, VIEW_LOCK)

    return render_template('processing.html', url_str=str(rm))