def _fetch_list_locked(self, url, query_string, query_params, headers): url_full = "%s?%s" % (url, query_string) jobs = [(self._fetch, url, query_string, query_params, headers)] q = pool_apply(self.store.finder.worker_pool(), jobs) log.debug('RemoteReader:: Storing FetchInProgress for %s' % url_full) return FetchInProgress(_Results(q))
def fetch(self, startTime, endTime, now=None, requestContext=None): seriesList = self.fetch_list(startTime, endTime, now, requestContext) def _fetch(seriesList): if seriesList is None: return None for series in seriesList: if series['name'] == self.metric_path: time_info = (series['start'], series['end'], series['step']) return (time_info, series['values']) return None if isinstance(seriesList, FetchInProgress): return FetchInProgress(lambda: _fetch(seriesList.waitForResults())) return _fetch(seriesList)
def fetch(self, startTime, endTime): query_params = [('target', self.query), ('format', 'pickle'), ('local', '1'), ('noCache', '1'), ('from', str(int(startTime))), ('until', str(int(endTime)))] query_string = urlencode(query_params) urlpath = '/render/?' + query_string url = "http://%s%s" % (self.store.host, urlpath) fetch_result = self.get_inflight_requests(url, urlpath) def extract_my_results(): series = fetch_result.get().get(self.metric_path, None) if not series: return None time_info = (series['start'], series['end'], series['step']) return (time_info, series['values']) return FetchInProgress(extract_my_results)
def fetch(self, startTime, endTime): def get_data(): if self.shared_reader.node_count > app_settings.OPENTSDB_METRIC_QUERY_LIMIT: data = self.shared_reader.get( self.opentsdb_uri, app_settings.OPENTSDB_DEFAULT_AGGREGATION_INTERVAL, self.leaf_data, int(startTime), int(endTime), ) else: data = requests.get( "%s/query?tsuid=sum:%ds-avg:%s&start=%d&end=%d" % ( self.opentsdb_uri, app_settings.OPENTSDB_DEFAULT_AGGREGATION_INTERVAL, self.leaf_data['tsuid'], int(startTime), int(endTime), )).json() time_info = (startTime, endTime, self.step) number_points = int((endTime - startTime) // self.step) datapoints = [None for i in range(number_points)] for series in data: for timestamp, value in series['dps'].items(): timestamp = int(timestamp) interval = timestamp - ( timestamp % app_settings.OPENTSDB_DEFAULT_AGGREGATION_INTERVAL) index = (interval - int(startTime)) // self.step datapoints[index] = value return (time_info, datapoints) job = app_settings.OPENTSDB_REQUEST_POOL.apply_async(get_data) return FetchInProgress(job.get)
def fetch(self, startTime, endTime): query_params = [('target', self.query), ('format', 'pickle'), ('local', '1'), ('noCache', '1'), ('from', str(int(startTime))), ('until', str(int(endTime)))] query_string = urlencode(query_params) urlpath = '/render/?' + query_string url = "http://%s%s" % (self.store.host, urlpath) # Quick cache check up front self.clean_cache() cached_results = self.request_cache.get(url) if cached_results: for series in cached_results: if series['name'] == self.metric_path: time_info = (series['start'], series['end'], series['step']) return (time_info, series['values']) # Synchronize with other RemoteReaders using the same bulk query. # Despite our use of thread synchronization primitives, the common # case is for synchronizing asynchronous fetch operations within # a single thread. (request_lock, completion_event) = self.get_request_locks(url) def request_series(): if request_lock.acquire( False ): # the FetchInProgress that gets waited on waits for the actual completion try: log.info("RemoteReader.request_data :: requesting %s" % url) connection = HTTPConnectionWithTimeout(self.store.host) connection.timeout = settings.REMOTE_FETCH_TIMEOUT connection.request('GET', urlpath) response = connection.getresponse() if response.status != 200: raise Exception( "Error response %d %s from %s" % (response.status, response.reason, url)) pickled_response = response.read() results = unpickle.loads(pickled_response) self.cache_lock.acquire() self.request_cache[url] = results self.cache_lock.release() completion_event.set() return results except: completion_event.set() self.store.fail() log.exception("Error requesting %s" % url) raise else: # otherwise we just wait on the completion_event completion_event.wait(settings.REMOTE_FETCH_TIMEOUT) cached_results = self.request_cache.get(url) if cached_results is None: raise Exception( "Passive remote fetch failed to find cached results") else: return cached_results def extract_my_results(): for series in request_series(): if series['name'] == self.metric_path: time_info = (series['start'], series['end'], series['step']) return (time_info, series['values']) return FetchInProgress(extract_my_results)
def fetch_list(self, startTime, endTime, now=None, requestContext=None): t = time.time() query_params = [('target', self.query), ('format', 'pickle'), ('local', '1'), ('from', str(int(startTime))), ('until', str(int(endTime)))] if now is not None: query_params.append(('now', str(int(now)))) query_string = urlencode(query_params) urlpath = '/render/' url = "%s://%s%s" % ('https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host, urlpath) headers = requestContext.get( 'forwardHeaders') if requestContext else None cacheKey = "%s?%s" % (url, query_string) if requestContext is not None and 'inflight_requests' in requestContext and cacheKey in requestContext[ 'inflight_requests']: self.log_debug( "RemoteReader:: Returning cached FetchInProgress %s?%s" % (url, query_string)) return requestContext['inflight_requests'][cacheKey] if requestContext is None or 'inflight_locks' not in requestContext or cacheKey not in requestContext[ 'inflight_locks']: with self.inflight_lock: self.log_debug("RemoteReader:: Got global lock %s?%s" % (url, query_string)) if requestContext is None: requestContext = {} if 'inflight_locks' not in requestContext: requestContext['inflight_locks'] = {} if 'inflight_requests' not in requestContext: requestContext['inflight_requests'] = {} if cacheKey not in requestContext['inflight_locks']: self.log_debug("RemoteReader:: Creating lock %s?%s" % (url, query_string)) requestContext['inflight_locks'][cacheKey] = Lock() self.log_debug("RemoteReader:: Released global lock %s?%s" % (url, query_string)) cacheLock = requestContext['inflight_locks'][cacheKey] with cacheLock: self.log_debug("RemoteReader:: got url lock %s?%s" % (url, query_string)) if cacheKey in requestContext['inflight_requests']: self.log_debug( "RemoteReader:: Returning cached FetchInProgress %s?%s" % (url, query_string)) return requestContext['inflight_requests'][cacheKey] q = Queue() if settings.USE_WORKER_POOL: get_pool().apply_async( func=self._fetch, args=[url, query_string, query_params, headers], callback=lambda x: q.put(x), ) else: q.put(self._fetch(url, query_string, query_params, headers), ) def retrieve(): with retrieve.lock: # if the result is known we return it directly if hasattr(retrieve, '_result'): results = getattr(retrieve, '_result') self.log_debug( 'RemoteReader:: retrieve completed (cached) %s' % (', '.join([result['path'] for result in results])), ) return results # otherwise we get it from the queue and keep it for later results = q.get(block=True) for i in range(len(results)): results[i]['path'] = results[i]['name'] if not results: self.log_debug( 'RemoteReader:: retrieve has received no results') setattr(retrieve, '_result', results) self.log_debug( 'RemoteReader:: retrieve completed %s' % (', '.join([result['path'] for result in results])), ) return results self.log_debug( 'RemoteReader:: Storing FetchInProgress with cacheKey {cacheKey}' .format(cacheKey=cacheKey), ) retrieve.lock = Lock() data = FetchInProgress(retrieve) requestContext['inflight_requests'][cacheKey] = data self.log_debug("RemoteReader:: Returning %s?%s in %fs" % (url, query_string, time.time() - t)) return data
def fetch(self, startTime, endTime): def get_data(startTime, endTime): log.info("time range %d-%d" % (startTime, endTime)) host, metric = self.metric_name.split("com.") host += "com" s = self.kudu_table.scanner() s.add_predicate(s.range_predicate(0, host, host)) s.add_predicate(s.range_predicate(1, metric, metric)) s.add_predicate(s.range_predicate(2, startTime, endTime)) s.open() values = [] while s.has_more_rows(): t = s.next_batch().as_tuples() log.info("metric batch: %d" % len(t)) values.extend([(time, value) for (_, _, time, value) in t]) # TODO: project just the time and value, not host/metric! values.sort() values_length = len(values) if values_length == 0: time_info = (startTime, endTime, 1) datapoints = [] return (time_info, datapoints) startTime = min(t[0] for t in values) endTime = max(t[0] for t in values) if values_length == 1: time_info = (startTime, endTime, 1) datapoints = [values[0][1]] return (time_info, datapoints) log.info("data: %s" % repr(values)) # 1. Calculate step (in seconds) # Step will be lowest time delta between values or 1 (in case if delta is smaller) step = 1 minDelta = None for i in range(0, values_length - 2): (timeI, valueI) = values[i] (timeIplus1, valueIplus1) = values[i + 1] delta = timeIplus1 - timeI if (minDelta == None or delta < minDelta): minDelta = delta if minDelta > step: step = minDelta # 2. Fill time info table time_info = (startTime, endTime, step) # 3. Create array of output points number_points = int(math.ceil((endTime - startTime) / step)) datapoints = [None for i in range(number_points)] # 4. Fill array of output points cur_index = 0 cur_value = None cur_time_stamp = None cur_value_used = None for i in range(0, number_points - 1): data_point_time_stamp = startTime + i * step (cur_time_stamp, cur_value) = values[cur_index] cur_time_stamp = cur_time_stamp while (cur_index + 1 < values_length): (next_time_stamp, next_value) = values[cur_index + 1] if next_time_stamp > data_point_time_stamp: break (cur_value, cur_time_stamp, cur_value_used) = (next_value, next_time_stamp, False) cur_index = cur_index + 1 data_point_value = None if (not cur_value_used and cur_time_stamp <= data_point_time_stamp): cur_value_used = True data_point_value = cur_value datapoints[i] = data_point_value log.info("data: %s" % repr(datapoints)) return (time_info, datapoints) job = KUDU_REQUEST_POOL.apply_async(get_data, [startTime, endTime]) return FetchInProgress(job.get)