def process_responses(response_queue, msg_in): """ Pulls responses off of the queue. """ log_name = '{0} :: {1}'.format(__name__, process_responses.__name__) logging.debug(log_name + ' - STARTING...') while 1: stream = '' # Block on the response queue try: res = response_queue.get(True) request_meta = rebuild_unpacked_request(res) except Exception: logging.error(log_name + ' - Could not get request meta') continue data = response_queue.get(True) while data: stream += data try: data = response_queue.get(True, timeout=1) except Empty: break try: data = eval(stream) except Exception as e: # Report a fraction of the failed response data directly in the # logger if len(unicode(stream)) > 2000: excerpt = stream[:1000] + ' ... ' + stream[-1000:] else: excerpt = stream logging.error(log_name + ' - Request failed. {0}\n\n' \ 'data excerpt: {1}'.format(e.message, excerpt)) # Format a response that will report on the failed request stream = "OrderedDict([('status', 'Request failed.'), " \ "('exception', '" + escape(unicode(e.message)) + "')," \ "('request', '" + escape(unicode(request_meta)) + "'), " \ "('data', '" + escape(unicode(stream)) + "')])" key_sig = build_key_signature(request_meta, hash_result=True) # Set request in list to "not alive" req_cb_flag_job_complete(key_sig, REQ_NCB_LOCK) logging.debug(log_name + ' - Setting data for {0}'.format( str(request_meta))) set_data(stream, request_meta) logging.debug(log_name + ' - SHUTTING DOWN...')
def job_control(request_queue, response_queue): """ Controls the execution of user metrics requests Parameters ~~~~~~~~~~ request_queue : multiprocessing.Queue Queues incoming API requests. """ # Store executed and pending jobs respectively job_queue = list() wait_queue = list() # Global job ID number job_id = 0 # Tallies the number of concurrently running jobs concurrent_jobs = 0 log_name = '{0} :: {1}'.format(__name__, job_control.__name__) logging.debug('{0} - STARTING...'.format(log_name)) while 1: # Request Queue Processing # ------------------------ try: # Pull an item off of the queue req_item = request_queue.get(timeout=QUEUE_WAIT) logging.debug(log_name + ' :: PULLING item from request queue -> ' \ '\n\tCOHORT = {0} - METRIC = {1}' .format(req_item['cohort_expr'], req_item['metric'])) except Exception as e: req_item = None #logging.debug('{0} :: {1} - Listening ...' #.format(__name__, job_control.__name__)) # Process complete jobs # --------------------- for job_item in job_queue: # Look for completed jobs if not job_item.queue.empty(): # Put request creds on res queue -- this goes to # response_handler asynchronously response_queue.put(unpack_fields(job_item.request), block=True) # Pull data off of the queue and add it to response queue while not job_item.queue.empty(): data = job_item.queue.get(True) if data: response_queue.put(data, block=True) del job_queue[job_queue.index(job_item)] concurrent_jobs -= 1 logging.debug(log_name + ' :: RUN -> RESPONSE - Job ID {0}' \ '\n\tConcurrent jobs = {1}' .format(str(job_item.id), concurrent_jobs)) # Process pending jobs # -------------------- for wait_req in wait_queue: if concurrent_jobs <= MAX_CONCURRENT_JOBS: # prepare job from item req_q = Queue() proc = Process(target=process_metrics, args=(req_q, wait_req)) proc.start() job_item = job_item_type(job_id, proc, wait_req, req_q) job_queue.append(job_item) del wait_queue[wait_queue.index(wait_req)] concurrent_jobs += 1 job_id += 1 logging.debug(log_name + ' :: WAIT -> RUN - Job ID {0}' \ '\n\tConcurrent jobs = {1}, ' \ 'COHORT = {2} - METRIC = {3}'\ .format(str(job_id), concurrent_jobs, wait_req.cohort_expr, wait_req.metric)) # Add newest job to the queue # --------------------------- if req_item: # Build the request item rm = rebuild_unpacked_request(req_item) logging.debug(log_name + ' : REQUEST -> WAIT ' \ '\n\tCOHORT = {0} - METRIC = {1}' .format(rm.cohort_expr, rm.metric)) wait_queue.append(rm) # Communicate with request notification callback about new job key_sig = build_key_signature(rm, hash_result=True) url = get_url_from_keys(build_key_signature(rm), REQUEST_PATH) req_cb_add_req(key_sig, url, REQ_NCB_LOCK) logging.debug('{0} - FINISHING.'.format(log_name))
def output(cohort, metric): """ View corresponding to a data request - All of the setup and execution for a request happens here. """ # Get URL. Check for refresh flag - drop from url url = request.url.split(request.url_root)[1] refresh = True if 'refresh' in request.args else False if refresh: url = sub(REFRESH_REGEX, '', url) # Get the refresh date of the cohort try: cid = query_mod.get_cohort_id(cohort) cohort_refresh_ts = get_cohort_refresh_datetime(cid) except Exception: cohort_refresh_ts = None logging.error(__name__ + ' :: Could not retrieve refresh ' 'time of cohort.') # Build a request and validate. # # 1. Populate with request parameters from query args. # 2. Filter the input discarding any url junk # 3. Process defaults for request parameters # 4. See if this maps to a single user request # 5. See if this maps to a single user request try: rm = RequestMetaFactory(cohort, cohort_refresh_ts, metric) except MetricsAPIError as e: return redirect(url_for('all_cohorts') + '?error=' + str(e.error_code)) filter_request_input(request, rm) try: format_request_params(rm) except MetricsAPIError as e: return redirect(url_for('all_cohorts') + '?error=' + str(e.error_code)) if rm.is_user: project = rm.project if rm.project else 'enwiki' if not MediaWikiUser.is_user_name(cohort, project): logging.error(__name__ + ' :: "{0}" is not a valid username ' 'in "{1}"'.format(cohort, project)) return redirect(url_for('all_cohorts') + '?error=3') else: # @TODO CALL COHORT VALIDATION HERE pass # Determine if the request maps to an existing response. # # 1. The response already exists in the hash, return. # 2. Otherwise, add the request tot the queue. data = get_data(rm) key_sig = build_key_signature(rm, hash_result=True) # Is the request already running? is_running = req_cb_get_is_running(key_sig, VIEW_LOCK) # Determine if request is already hashed if data and not refresh: return make_response(jsonify(data)) # Determine if the job is already running elif is_running: return render_template('processing.html', error=error_codes[0], url_str=str(rm)) # Add the request to the queue else: api_request_queue.put(unpack_fields(rm), block=True) req_cb_add_req(key_sig, url, VIEW_LOCK) return render_template('processing.html', url_str=str(rm))