def process_responses(response_queue, msg_in):
    """ Pulls responses off of the queue. """

    log_name = '{0} :: {1}'.format(__name__, process_responses.__name__)
    logging.debug(log_name  + ' - STARTING...')

    while 1:
        stream = ''

        # Block on the response queue
        try:
            res = response_queue.get(True)
            request_meta = rebuild_unpacked_request(res)
        except Exception:
            logging.error(log_name + ' - Could not get request meta')
            continue

        data = response_queue.get(True)
        while data:
            stream += data
            try:
                data = response_queue.get(True, timeout=1)
            except Empty:
                break

        try:
            data = eval(stream)
        except Exception as e:

            # Report a fraction of the failed response data directly in the
            # logger
            if len(unicode(stream)) > 2000:
                excerpt = stream[:1000] + ' ... ' + stream[-1000:]
            else:
                excerpt = stream

            logging.error(log_name + ' - Request failed. {0}\n\n' \
                                     'data excerpt: {1}'.format(e.message, excerpt))

            # Format a response that will report on the failed request
            stream = "OrderedDict([('status', 'Request failed.'), " \
                     "('exception', '" + escape(unicode(e.message)) + "')," \
                     "('request', '" + escape(unicode(request_meta)) + "'), " \
                     "('data', '" + escape(unicode(stream)) + "')])"

        key_sig = build_key_signature(request_meta, hash_result=True)

        # Set request in list to "not alive"
        req_cb_flag_job_complete(key_sig, REQ_NCB_LOCK)

        logging.debug(log_name + ' - Setting data for {0}'.format(
            str(request_meta)))
        set_data(stream, request_meta)

    logging.debug(log_name + ' - SHUTTING DOWN...')
示例#2
0
def job_control(request_queue, response_queue):
    """
        Controls the execution of user metrics requests

        Parameters
        ~~~~~~~~~~

        request_queue : multiprocessing.Queue
           Queues incoming API requests.

    """

    # Store executed and pending jobs respectively
    job_queue = list()
    wait_queue = list()

    # Global job ID number
    job_id = 0

    # Tallies the number of concurrently running jobs
    concurrent_jobs = 0

    log_name = '{0} :: {1}'.format(__name__, job_control.__name__)

    logging.debug('{0} - STARTING...'.format(log_name))

    while 1:

        # Request Queue Processing
        # ------------------------

        try:
            # Pull an item off of the queue

            req_item = request_queue.get(timeout=QUEUE_WAIT)

            logging.debug(log_name + ' :: PULLING item from request queue -> ' \
                                     '\n\tCOHORT = {0} - METRIC = {1}'
                .format(req_item['cohort_expr'], req_item['metric']))

        except Exception as e:
            req_item = None
            #logging.debug('{0} :: {1}  - Listening ...'
            #.format(__name__, job_control.__name__))


        # Process complete jobs
        # ---------------------

        for job_item in job_queue:

            # Look for completed jobs
            if not job_item.queue.empty():

                # Put request creds on res queue -- this goes to
                # response_handler asynchronously
                response_queue.put(unpack_fields(job_item.request),
                                   block=True)

                # Pull data off of the queue and add it to response queue
                while not job_item.queue.empty():
                    data = job_item.queue.get(True)
                    if data:
                        response_queue.put(data, block=True)

                del job_queue[job_queue.index(job_item)]

                concurrent_jobs -= 1

                logging.debug(log_name + ' :: RUN -> RESPONSE - Job ID {0}' \
                                         '\n\tConcurrent jobs = {1}'
                    .format(str(job_item.id), concurrent_jobs))


        # Process pending jobs
        # --------------------

        for wait_req in wait_queue:
            if concurrent_jobs <= MAX_CONCURRENT_JOBS:
                # prepare job from item

                req_q = Queue()
                proc = Process(target=process_metrics, args=(req_q, wait_req))
                proc.start()

                job_item = job_item_type(job_id, proc, wait_req, req_q)
                job_queue.append(job_item)

                del wait_queue[wait_queue.index(wait_req)]

                concurrent_jobs += 1
                job_id += 1

                logging.debug(log_name + ' :: WAIT -> RUN - Job ID {0}' \
                                         '\n\tConcurrent jobs = {1}, ' \
                                         'COHORT = {2} - METRIC = {3}'\
                    .format(str(job_id), concurrent_jobs,
                            wait_req.cohort_expr, wait_req.metric))


        # Add newest job to the queue
        # ---------------------------

        if req_item:

            # Build the request item
            rm = rebuild_unpacked_request(req_item)

            logging.debug(log_name + ' : REQUEST -> WAIT ' \
                                     '\n\tCOHORT = {0} - METRIC = {1}'
                .format(rm.cohort_expr, rm.metric))
            wait_queue.append(rm)

            # Communicate with request notification callback about new job
            key_sig = build_key_signature(rm, hash_result=True)
            url = get_url_from_keys(build_key_signature(rm), REQUEST_PATH)
            req_cb_add_req(key_sig, url, REQ_NCB_LOCK)


    logging.debug('{0} - FINISHING.'.format(log_name))
示例#3
0
def output(cohort, metric):
    """ View corresponding to a data request -
        All of the setup and execution for a request happens here. """

    # Get URL.  Check for refresh flag - drop from url
    url = request.url.split(request.url_root)[1]
    refresh = True if 'refresh' in request.args else False
    if refresh:
        url = sub(REFRESH_REGEX, '', url)

    # Get the refresh date of the cohort
    try:
        cid = query_mod.get_cohort_id(cohort)
        cohort_refresh_ts = get_cohort_refresh_datetime(cid)
    except Exception:
        cohort_refresh_ts = None
        logging.error(__name__ + ' :: Could not retrieve refresh '
                                 'time of cohort.')

    # Build a request and validate.
    #
    # 1. Populate with request parameters from query args.
    # 2. Filter the input discarding any url junk
    # 3. Process defaults for request parameters
    # 4. See if this maps to a single user request
    # 5. See if this maps to a single user request
    try:
        rm = RequestMetaFactory(cohort, cohort_refresh_ts, metric)
    except MetricsAPIError as e:
        return redirect(url_for('all_cohorts') + '?error=' +
                        str(e.error_code))

    filter_request_input(request, rm)
    try:
        format_request_params(rm)
    except MetricsAPIError as e:
        return redirect(url_for('all_cohorts') + '?error=' +
                        str(e.error_code))

    if rm.is_user:
        project = rm.project if rm.project else 'enwiki'
        if not MediaWikiUser.is_user_name(cohort, project):
            logging.error(__name__ + ' :: "{0}" is not a valid username '
                                     'in "{1}"'.format(cohort, project))
            return redirect(url_for('all_cohorts') + '?error=3')
    else:
        # @TODO CALL COHORT VALIDATION HERE
        pass

    # Determine if the request maps to an existing response.
    #
    # 1. The response already exists in the hash, return.
    # 2. Otherwise, add the request tot the queue.
    data = get_data(rm)
    key_sig = build_key_signature(rm, hash_result=True)

    # Is the request already running?
    is_running = req_cb_get_is_running(key_sig, VIEW_LOCK)

    # Determine if request is already hashed
    if data and not refresh:
        return make_response(jsonify(data))

    # Determine if the job is already running
    elif is_running:
        return render_template('processing.html',
                               error=error_codes[0],
                               url_str=str(rm))

    # Add the request to the queue
    else:
        api_request_queue.put(unpack_fields(rm), block=True)
        req_cb_add_req(key_sig, url, VIEW_LOCK)

    return render_template('processing.html', url_str=str(rm))