def __init__(self, *args, **kwargs): name = None label = None if kwargs.has_key('name'): name = kwargs['name'] del kwargs['name'] if kwargs.has_key('label'): label = kwargs['label'] del kwargs['label'] BranchNode.__init__(self, *args, **kwargs) if name: self.name = name if label: self.label = label # XXX workaround for renaming in graphite self.metric_path = self.path
def find_nodes(self, query): if query.pattern == 'foo': yield BranchNode('foo') elif query.pattern == 'bar.*': for i in range(10): path = 'bar.{0}'.format(i) yield LeafNode(path, DummyReader(path))
def find_nodes(self, query): clean_pattern = query.pattern.replace('\\', '') pattern_parts = clean_pattern.split('.') for root_dir in self.directories: for absolute_path in self._find_paths(root_dir, pattern_parts): if basename(absolute_path).startswith('.'): continue if self.DATASOURCE_DELIMITER in basename(absolute_path): (absolute_path, datasource_pattern) = absolute_path.rsplit(self.DATASOURCE_DELIMITER, 1) else: datasource_pattern = None relative_path = absolute_path[ len(root_dir): ].lstrip('/') metric_path = fs_to_metric(relative_path) real_metric_path = get_real_metric_path(absolute_path, relative_path) metric_path_parts = metric_path.split('.') for field_index in find_escaped_pattern_fields(query.pattern): metric_path_parts[field_index] = pattern_parts[field_index].replace('\\', '') metric_path = '.'.join(metric_path_parts) # Now we construct and yield an appropriate Node object if isdir(absolute_path): yield BranchNode(metric_path) elif isfile(absolute_path): if absolute_path.endswith('.wsp') and WhisperReader.supported: reader = WhisperReader(absolute_path, real_metric_path) yield LeafNode(metric_path, reader) elif absolute_path.endswith('.wsp.gz') and GzippedWhisperReader.supported: reader = GzippedWhisperReader(absolute_path, real_metric_path) yield LeafNode(metric_path, reader) elif absolute_path.endswith('.rrd') and RRDReader.supported: if datasource_pattern is None: yield BranchNode(metric_path) else: for datasource_name in RRDReader.get_datasources(absolute_path): if match_entries([datasource_name], datasource_pattern): reader = RRDReader(absolute_path, datasource_name) yield LeafNode(metric_path + "." + datasource_name, reader)
def get_results(self): if self.failed: return if self.cachedResult is not None: results = self.cachedResult else: if self.connection is None: self.send() try: try: # Python 2.7+, use buffering of HTTP responses response = self.connection.getresponse(buffering=True) except TypeError: # Python 2.6 and older response = self.connection.getresponse() assert response.status == 200, "received error response %s - %s" % ( response.status, response.reason) result_data = response.read() results = unpickle.loads(result_data) except: log.exception( "FindRequest.get_results(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self.store, node_info, bulk_query=self.query.pattern) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def find_nodes(self, query): childs = self.tree.selfAndChildPaths(query.pattern) childNodes = self.tree.getNode( [child for child, isMetric in childs if isMetric]) # make sure we yield in the DB order for child, isMetric in childs: if isMetric: yield LeafNode(child, CassandraReader(childNodes[child], child)) else: yield BranchNode(child)
def find_nodes(self, query): request = requests.post('%s/search' % graphouse_url, data={'query': query.pattern}) request.raise_for_status() result = request.text.split('\n') for metric in result: if not metric: continue if metric.endswith('.'): yield BranchNode(metric[:-1]) else: yield LeafNode(metric, GraphouseReader(metric))
def _find_paths(self, currNodeRowKey, patterns): """Recursively generates absolute paths whose components underneath current_node match the corresponding pattern in patterns""" from graphite.node import BranchNode, LeafNode from graphite.intervals import Interval, IntervalSet pattern = patterns[0] patterns = patterns[1:] nodeRow = self.client.getRow(self.metaTable, currNodeRowKey, None) if len(nodeRow) == 0: return subnodes = {} for k, v in nodeRow[0].columns.items(): if k.startswith("cf:c_"): # branches start with c_ key = k.split("_", 2)[1] # pop off cf:c_ prefix subnodes[key] = v.value matching_subnodes = match_entries(subnodes.keys(), pattern) if patterns: # we've still got more directories to traverse for subnode in matching_subnodes: rowKey = subnodes[subnode] subNodeContents = self.client.getRow(self.metaTable, rowKey, None) # leafs have a cf:INFO column describing their data # we can't possibly match on a leaf here because we have more components in the pattern, # so only recurse on branches if "cf:INFO" not in subNodeContents[0].columns: for m in self._find_paths(rowKey, patterns): yield m else: # at the end of the pattern for subnode in matching_subnodes: rowKey = subnodes[subnode] nodeRow = self.client.getRow(self.metaTable, rowKey, None) if len(nodeRow) == 0: continue metric = rowKey.split("_", 2)[1] # pop off "m_" in key if "cf:INFO" in nodeRow[0].columns: info = json.loads(nodeRow[0].columns["cf:INFO"].value) start = time.time() - info['maxRetention'] end = time.time() intervals = IntervalSet([Interval(start, end)]) reader = HbaseReader(metric, intervals, info, self) yield LeafNode(metric, reader) else: yield BranchNode(metric)
def find_nodes(self, query): for fs_path in glob( self.tree.getFilesystemPath(query.pattern) ): metric_path = self.tree.getNodePath(fs_path) if CeresNode.isNodeDir(fs_path): ceres_node = self.tree.getNode(metric_path) if ceres_node.hasDataForInterval(query.startTime, query.endTime): real_metric_path = get_real_metric_path(fs_path, metric_path) reader = CeresReader(ceres_node, real_metric_path) yield LeafNode(metric_path, reader) elif isdir(fs_path): yield BranchNode(metric_path)
def find_nodes(self, query): variants = extract_variants(query.pattern) for variant in variants: for fs_path in glob( self.tree.getFilesystemPath(variant)): metric_path = self.tree.getNodePath(fs_path) if CeresNode.isNodeDir(fs_path): ceres_node = self.tree.getNode(metric_path) if ceres_node.hasDataForInterval(query.startTime, query.endTime): relative_path = fs_path[len(self.directory):].lstrip('/') real_metric_path = get_real_metric_path(fs_path, relative_path) reader = CeresReader(ceres_node, real_metric_path) yield LeafNode(metric_path, reader) elif os.path.isdir(fs_path): yield BranchNode(metric_path)
def find_nodes(self, query): log.info('find_nodes: %s' % (query.pattern)) # Parse the query path_items = filter(None, query.pattern.split('.')) records = [] # Take request addressed only for this finder if path_items[0] == '*' or path_items[0] in self.tree: # Get the part of tree described by the query records = self.get_records(path_items) # Build node for record in records: if record['leaf']: yield LeafNode(record['id'], RandomReader(record['id'])) else: yield BranchNode(record['id'])
def find_nodes(self, query): ''' This method processed in graphite 1.0 and older ''' result = self._search_request(query.pattern) fetcher = GraphouseMultiFetcher() for metric in result[1]: if not metric: continue if metric.endswith('.'): yield BranchNode(metric[:-1]) else: try: yield LeafNode(metric, GraphouseReader(metric, fetcher=fetcher)) except OverflowError: fetcher = GraphouseMultiFetcher() yield LeafNode(metric, GraphouseReader(metric, fetcher=fetcher))
def get_results(self): if self.failed: return if self.cachedResult is not None: results = self.cachedResult else: if self.connection is None: self.send() try: try: # Python 2.7+, use buffering of HTTP responses response = self.connection.getresponse(buffering=True) except TypeError: # Python 2.6 and older response = self.connection.getresponse() assert response.status == 200, "received error response %s - %s" % ( response.status, response.reason) result_data = response.read() results = unpickle.loads(result_data) except: log.exception( "FindRequest.get_results(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: if node_info.get('is_leaf'): reader = RemoteReader(self.store, node_info, bulk_query=self.query.pattern) node = LeafNode(node_info['path'], reader) else: node = BranchNode(node_info['path']) node.local = False yield node
def find_nodes(self, query): yield BranchNode('a.b.c') yield LeafNode('a.b.c.d', DummyReader('a.b.c.d')) yield LeafNode('a.b.c.e', DummyReader('a.b.c.e'))
def send(self, headers=None, msg_setter=None): log.debug("FindRequest.send(host=%s, query=%s) called" % (self.store.host, self.query)) if headers is None: headers = {} results = cache.get(self.cacheKey) if results is not None: log.debug( "FindRequest.send(host=%s, query=%s) using cached result" % (self.store.host, self.query)) else: url = "%s://%s/metrics/find/" % ( 'https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host) query_params = [ ('local', '1'), ('format', 'pickle'), ('query', self.query.pattern), ] if self.query.startTime: query_params.append(('from', self.query.startTime)) if self.query.endTime: query_params.append(('until', self.query.endTime)) try: result = http.request( 'POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=query_params, headers=headers, timeout=settings.REMOTE_FIND_TIMEOUT) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception during request" % (self.store.host, self.query)) self.store.fail() return if result.status != 200: log.exception( "FindRequest.send(host=%s, query=%s) error response %d from %s?%s" % (self.store.host, self.query, result.status, url, urlencode(query_params))) self.store.fail() return try: results = unpickle.loads(result.data) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) msg_setter('host: {host}, query: {query}'.format(host=self.store.host, query=self.query)) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self.store, node_info, bulk_query=[self.query.pattern]) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def find_nodes(self, query, timer=None): timer.set_msg('host: {host}, query: {query}'.format(host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash( query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request(url, fields=query_params, headers=query.headers, timeout=settings.REMOTE_FIND_TIMEOUT) try: if result.getheader('content-type') == 'application/x-msgpack': results = msgpack.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8') else: results = unpickle.load( BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE)) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding find response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding find response from %s: %s" % (result.url_full, err)) finally: result.release_conn() cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def find_nodes(self, query): log.debug("IRONdbFinder.find_nodes, query: %s, max_retries: %d" % (query.pattern, self.max_retries)) metrics_expand = False if query.pattern.endswith('.**'): query.pattern = query.pattern[:-1] metrics_expand = True names = {} name_headers = copy.deepcopy(self.headers) name_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list' for i in range(0, self.max_retries): try: if self.zipkin_enabled == True: traceheader = binascii.hexlify(os.urandom(8)) name_headers['X-B3-TraceId'] = traceheader name_headers['X-B3-SpanId'] = traceheader if self.zipkin_event_trace_level == 1: name_headers['X-Mtev-Trace-Event'] = '1' elif self.zipkin_event_trace_level == 2: name_headers['X-Mtev-Trace-Event'] = '2' r = requests.get(urls.names, params={'query': query.pattern}, headers=name_headers, timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0))) r.raise_for_status() if r.headers['content-type'] == 'application/json': names = r.json() elif r.headers['content-type'] == 'application/x-flatbuffer-metric-find-result-list': names = irondb_flatbuf.metric_find_results(r.content) else: pass break except (socket.gaierror, requests.exceptions.ConnectionError) as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ConnectionError %s" % ex) except requests.exceptions.ConnectTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ConnectTimeout %s" % ex) except irondb_flatbuf.FlatBufferError as ex: # flatbuffer error, try again log.exception("IRONdbFinder.find_nodes FlatBufferError %s" % ex) except JSONDecodeError as ex: # json error, try again log.exception("IRONdbFinder.find_nodes JSONDecodeError %s" % ex) except requests.exceptions.ReadTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ReadTimeout %s" % ex) except requests.exceptions.HTTPError as ex: # http status code errors are failures, stop immediately log.exception("IRONdbFinder.find_nodes HTTPError %s %s" % (ex, r.content)) break if settings.DEBUG: log.debug("IRONdbFinder.find_nodes, result: %s" % json.dumps(names)) # for each set of self.batch_size leafnodes, execute an IRONdbMeasurementFetcher # so we can do these in batches. measurement_headers = copy.deepcopy(self.headers) measurement_headers['Accept'] = 'application/x-flatbuffer-metric-get-result-list' fetcher = IRONdbMeasurementFetcher(measurement_headers, self.timeout, self.connection_timeout, self.database_rollups, self.rollup_window, self.max_retries, self.zipkin_enabled, self.zipkin_event_trace_level) for name in names: if 'leaf' in name and 'leaf_data' in name: fetcher.add_leaf(name['name'], name['leaf_data']) reader = IRONdbReader(name['name'], fetcher) yield LeafNode(name['name'], reader) else: yield BranchNode(name['name']) if metrics_expand: query = FindQuery(name['name'] + '.**', None, None) for node in self.find_nodes(query): yield node
def find_nodes(self, query, timer=None): timer.set_msg('host: {host}, query: {query}'.format(host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash( query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request(url, fields=query_params, headers=query.headers, timeout=settings.FIND_TIMEOUT) try: if result.getheader('content-type') == 'application/x-msgpack': results = msgpack.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8') else: results = unpickle.load( BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE)) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding find response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding find response from %s: %s" % (result.url_full, err)) finally: result.release_conn() cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) # We don't use generator here, this function may be run as a job in a thread pool, using a generator has the following risks: # 1. Generators are lazy, if we don't iterator the returned generator in the job, the real execution(network operations, # time-consuming) are very likely be triggered in the calling thread, losing the effect of thread pool; # 2. As function execution is delayed, the job manager can not catch job runtime exception as expected/designed; nodes = [] for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False nodes.append(node) return nodes
def find_nodes(self, query): for l, r in self.metric_index.find_tree(query.pattern): if l: yield LeafNode(r.decode(), None) else: yield BranchNode(r.decode())
def find_nodes(self, query): clean_pattern = query.pattern.replace('\\', '') # translate query pattern if it is tagged tagged = not query.pattern.startswith( '_tagged.') and ';' in query.pattern if tagged: # tagged series are stored in whisper using encoded names, so to retrieve them we need to # encode the query pattern using the same scheme used in carbon when they are written. encoded_paths = [ TaggedSeries.encode(query.pattern, sep=os.sep, hash_only=True), TaggedSeries.encode(query.pattern, sep=os.sep, hash_only=False), ] pattern_parts = clean_pattern.split('.') for root_dir in self.directories: if tagged: relative_paths = [] for pattern in encoded_paths: entries = [ pattern + '.wsp', pattern + '.wsp.gz', pattern + '.rrd', ] for entry in entries: if isfile(join(root_dir, entry)): relative_paths.append(entry) else: relative_paths = self._find_paths(root_dir, pattern_parts) for relative_path in relative_paths: if basename(relative_path).startswith('.'): continue if self.DATASOURCE_DELIMITER in basename(relative_path): (relative_path, datasource_pattern) = relative_path.rsplit( self.DATASOURCE_DELIMITER, 1) else: datasource_pattern = None absolute_path = join(root_dir, relative_path) metric_path = fs_to_metric(relative_path) real_metric_path = get_real_metric_path( absolute_path, metric_path) # if we're finding by tag, return the proper metric path if tagged: metric_path = query.pattern else: metric_path_parts = metric_path.split('.') for field_index in find_escaped_pattern_fields( query.pattern): metric_path_parts[field_index] = pattern_parts[ field_index].replace('\\', '') metric_path = '.'.join(metric_path_parts) # Now we construct and yield an appropriate Node object if isdir(absolute_path): yield BranchNode(metric_path) elif absolute_path.endswith( '.wsp') and WhisperReader.supported: reader = WhisperReader(absolute_path, real_metric_path) yield LeafNode(metric_path, reader) elif absolute_path.endswith( '.wsp.gz') and GzippedWhisperReader.supported: reader = GzippedWhisperReader(absolute_path, real_metric_path) yield LeafNode(metric_path, reader) elif absolute_path.endswith('.rrd') and RRDReader.supported: if datasource_pattern is None: yield BranchNode(metric_path) else: for datasource_name in RRDReader.get_datasources( absolute_path): if match_entries([datasource_name], datasource_pattern): reader = RRDReader(absolute_path, datasource_name) yield LeafNode( metric_path + "." + datasource_name, reader)