Пример #1
0
  def get_results(self):
    if self.failed:
      return

    if self.cachedResult is not None:
      results = self.cachedResult
    else:
      if self.connection is None:
        self.send()

      try:
        response = self.connection.getresponse()
        assert response.status == 200, "received error response %s - %s" % (response.status, response.reason)
        result_data = response.read()
        results = unpickle.loads(result_data)

      except:
        log.exception("FindRequest.get_results(host=%s, query=%s) exception processing response" % (self.store.host, self.query))
        self.store.fail()
        return

      cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION)

    for node_info in results:
      if node_info.get('is_leaf'):
        reader = RemoteReader(self.store, node_info, bulk_query=self.query.pattern)
        node = LeafNode(node_info['path'], reader)
      else:
        node = BranchNode(node_info['path'])

      node.local = False
      yield node
Пример #2
0
  def get_results(self):
    if self.failed:
      return

    if self.cachedResult is not None:
      results = self.cachedResult
    else:
      if self.connection is None:
        self.send()

      try:
        response = self.connection.getresponse()
        assert response.status == 200, "received error response %s - %s" % (response.status, response.reason)
        result_data = response.read()
        results = unpickle.loads(result_data)

      except:
        log.exception("FindRequest.get_results(host=%s, query=%s) exception processing response" % (self.store.host, self.query))
        self.store.fail()
        return

      cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION)

    for node_info in results:
      if node_info.get('is_leaf'):
        reader = RemoteReader(self.store, node_info, bulk_query=self.query.pattern)
        node = LeafNode(node_info['path'], reader)
      else:
        node = BranchNode(node_info['path'])

      node.local = False
      yield node
Пример #3
0
    def find_nodes(self, query, reqkey):
        log.info("running blablabla RRd")
        clean_pattern = query.pattern.replace('\\', '')
        pattern_parts = clean_pattern.split('.')

        for root_dir in self.directories:
            for absolute_path in self._find_paths(root_dir, pattern_parts):
                if basename(absolute_path).startswith('.'):
                    continue

                if self.DATASOURCE_DELIMETER in basename(absolute_path):
                    (absolute_path, datasource_pattern) = absolute_path.rsplit(
                        self.DATASOURCE_DELIMETER, 1)
                else:
                    datasource_pattern = None

                relative_path = absolute_path[len(root_dir):].lstrip('/')
                metric_path = fs_to_metric(relative_path)
                real_metric_path = get_real_metric_path(
                    absolute_path, metric_path)

                metric_path_parts = metric_path.split('.')
                for field_index in find_escaped_pattern_fields(query.pattern):
                    metric_path_parts[field_index] = pattern_parts[
                        field_index].replace('\\', '')
                metric_path = '.'.join(metric_path_parts)

                # Now we construct and yield an appropriate Node object
                if isdir(absolute_path):
                    yield BranchNode(metric_path)

                elif isfile(absolute_path):
                    if absolute_path.endswith(
                            '.wsp') and WhisperReader.supported:
                        reader = WhisperReader(absolute_path, real_metric_path)
                        yield LeafNode(metric_path, reader)

                    elif absolute_path.endswith(
                            '.wsp.gz') and GzippedWhisperReader.supported:
                        reader = GzippedWhisperReader(absolute_path,
                                                      real_metric_path)
                        yield LeafNode(metric_path, reader)

                    elif absolute_path.endswith(
                            '.rrd') and RRDReader.supported:
                        if datasource_pattern is None:
                            yield BranchNode(metric_path)

                        else:
                            for datasource_name in RRDReader.get_datasources(
                                    absolute_path):
                                if match_entries([datasource_name],
                                                 datasource_pattern):
                                    reader = RRDReader(absolute_path,
                                                       datasource_name)
                                    yield LeafNode(
                                        metric_path + "." + datasource_name,
                                        reader)
Пример #4
0
    def get_results(self):
        if self.failed:
            return

        if self.cachedResult is not None:
            results = self.cachedResult
        else:
            if self.connection is None:
                self.send()

            try:
                try:  # Python 2.7+, use buffering of HTTP responses
                    response = self.connection.getresponse(buffering=True)
                except TypeError:  # Python 2.6 and older
                    response = self.connection.getresponse()
                assert response.status == 200, "received error response %s - %s" % (
                    response.status, response.reason)
                result_data = response.read()
                results = unpickle.loads(result_data)

            except:
                log.exception(
                    "FindRequest.get_results(host=%s, query=%s) exception processing response"
                    % (self.store.host, self.query))
                self.store.fail()
                return

            cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION)

        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet([
                    Interval(interval[0], interval[1])
                    for interval in intervals
                ])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self.store,
                                      node_info,
                                      bulk_query=self.query.pattern)
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            yield node
Пример #5
0
    def test_MultiReader_init(self):
        self.create_whisper_hosts()
        self.addCleanup(self.wipe_whisper_hosts)

        wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu')
        node1 = LeafNode('hosts.worker1.cpu', wr1)

        wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu')
        node2 = LeafNode('hosts.worker2.cpu', wr2)

        reader = MultiReader([node1, node2])
        self.assertIsNotNone(reader)
Пример #6
0
    def test_MultiReader_get_intervals(self):
        self.create_whisper_hosts()
        self.addCleanup(self.wipe_whisper_hosts)

        wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu')
        node1 = LeafNode('hosts.worker1.cpu', wr1)

        wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu')
        node2 = LeafNode('hosts.worker2.cpu', wr2)

        reader = MultiReader([node1, node2])
        intervals = reader.get_intervals()
        for interval in intervals:
            self.assertEqual(int(interval.start), self.start_ts - 60)
            self.assertEqual(int(interval.end), self.start_ts)
Пример #7
0
    def find_nodes(self, query, cache_incomplete_nodes=None):
        clean_patterns = query.pattern.replace('\\', '')
        has_wildcard = clean_patterns.find('{') > -1 or clean_patterns.find(
            '[') > -1 or clean_patterns.find('*') > -1 or clean_patterns.find(
                '?') > -1

        if cache_incomplete_nodes is None:
            cache_incomplete_nodes = {}

        # CarbonLink has some hosts
        if CarbonLink.hosts:
            metric = clean_patterns

            # Let's combine these two cases:
            # 1) has_wildcard
            # 2) single metric query
            # Expand queries in CarbonLink
            # we will get back a list of tuples (metric_name, is_leaf) here.
            # For example,
            # [(metric1, False), (metric2, True)]
            metrics = CarbonLink.expand_query(metric)
            # dedup, because of BranchNodes
            metrics = list(set(metrics))
            # check all metrics in same valid query range
            prechecks = []
            for m, is_leaf in metrics:
                if is_leaf:
                    prechecks.append(CarbonLink.precheck(m, query.startTime))
                else:  # return True for BranchNode
                    prechecks.append((True, True))
            exists = all((exist for exist, partial_exist in prechecks))
            partial_exists = all(
                (partial_exist for exist, partial_exist in prechecks))
            if exists:
                for metric, is_leaf in metrics:
                    if is_leaf:
                        reader = CarbonCacheReader(metric)
                        yield LeafNode(metric, reader)
                    else:
                        yield BranchNode(metric)
            elif partial_exists:
                for metric, is_leaf in metrics:
                    if is_leaf:
                        reader = CarbonCacheReader(metric)
                        cache_incomplete_nodes[metric] = LeafNode(
                            metric, reader)
                    else:
                        cache_incomplete_nodes[metric] = BranchNode(metric)
Пример #8
0
    def find_nodes(self, query):
        names = {}
        at_least_tries = 3
        for i in range(0, max(urls.host_count, at_least_tries)):
            try:
                names = requests.get(urls.names, params={'query': query.pattern}, headers=self.headers, timeout=((self.connection_timeout / 1000), (self.timeout / 1000))).json()
                break
            except requests.exceptions.RequestException:
                # on down nodes, try again on another node until we try them all
                pass

        # for each set of self.batch_size leafnodes, execute an IronDBMeasurementFetcher
        # so we can do these in batches.
        counter = 0
        fetcher = IronDBMeasurementFetcher(self.headers, self.timeout, self.connection_timeout, self.database_rollups)

        for name in names:
            if name['leaf']:
                fetcher.add_leaf(name['name'], name['leaf_data'])
                reader = IronDBReader(name['name'], fetcher)
                counter = counter + 1
                if (counter % self.batch_size == 0):
                    fetcher = IronDBMeasurementFetcher(self.headers, self.timeout, self.connection_timeout, self.database_rollups)
                    counter = 0
                yield LeafNode(name['name'], reader)
            else:
                yield BranchNode(name['name'])
Пример #9
0
    def _get_branch_nodes(self, kudu_table, input_branch, path):
        results = input_branch.getChildren()
        if results:
            if path:
                path += '.'

            branches = []
            leaves = []

            for item in results:
                if item.isLeaf():
                    leaves.append(item)
                else:
                    branches.append(item)

            if (len(branches) != 0):
                for branch in branches:
                    node_name = branch.getName()
                    node_path = path + node_name
                    yield BranchNode(node_path), branch, node_name, node_path
            if (len(leaves) != 0):
                for leaf in leaves:
                    node_name = leaf.getName()
                    node_path = path + node_name
                    reader = KuduReader(self.kudu_table, node_path)
                    yield LeafNode(node_path,
                                   reader), leaf, node_name, node_path
Пример #10
0
    def find_nodes(self, query):

        # translate query pattern if it is tagged
        tagged = not query.pattern.startswith(
            '_tagged.') and ';' in query.pattern
        if tagged:
            # tagged series are stored in ceres using encoded names, so to retrieve them we need to encode the
            # query pattern using the same scheme used in carbon when they are written.
            variants = [TaggedSeries.encode(query.pattern)]
        else:
            variants = extract_variants(query.pattern)

        for variant in variants:
            for fs_path in glob(self.tree.getFilesystemPath(variant)):
                metric_path = self.tree.getNodePath(fs_path)

                if CeresNode.isNodeDir(fs_path):
                    ceres_node = self.tree.getNode(metric_path)

                    if ceres_node.hasDataForInterval(query.startTime,
                                                     query.endTime):
                        real_metric_path = get_real_metric_path(
                            fs_path, metric_path)
                        reader = CeresReader(ceres_node, real_metric_path)
                        # if we're finding by tag, return the proper metric path
                        if tagged:
                            metric_path = query.pattern
                        yield LeafNode(metric_path, reader)

                elif os.path.isdir(fs_path):
                    yield BranchNode(metric_path)
Пример #11
0
    def test_MultiReader_fetch(self):
        self.create_whisper_hosts()
        self.addCleanup(self.wipe_whisper_hosts)

        wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu')
        node1 = LeafNode('hosts.worker1.cpu', wr1)

        wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu')
        node2 = LeafNode('hosts.worker2.cpu', wr2)

        reader = MultiReader([node1, node2])

        results = reader.fetch(self.start_ts - 5, self.start_ts)

        (_, values) = results
        self.assertEqual(values, [None, None, None, None, 1.0])
Пример #12
0
    def __init__(self, *args, **kwargs):
        name = None
        label = None
        if kwargs.has_key('name'):
            name = kwargs['name']
            del kwargs['name']
        if kwargs.has_key('label'):
            label = kwargs['label']
            del kwargs['label']

        LeafNode.__init__(self, *args, **kwargs)
        if name:
            self.name = name
        if label:
            self.label = label
        self.metric_path = self.path
Пример #13
0
    def __init__(self, *args, **kwargs):
        name = None
        label = None
        if kwargs.has_key('name'):
            name = kwargs['name']
            del kwargs['name']
        if kwargs.has_key('label'):
            label = kwargs['label']
            del kwargs['label']

        LeafNode.__init__(self, *args, **kwargs)
        if name:
            self.name = name
        if label:
            self.label = label
        self.metric_path = self.path
Пример #14
0
 def test_MultiReader_merge_normal(self):
     results1 = ((1496252939, 1496252944, 1), [None, None, None, None, 1.0])
     results2 = ((1496252939, 1496252944, 1), [1.0, 1.0, 1.0, 1.0, 1.0])
     wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu')
     node1 = LeafNode('hosts.worker1.cpu', wr1)
     reader = MultiReader([node1])
     (_, values) = reader.merge(results1, results2)
     self.assertEqual(values, [1.0, 1.0, 1.0, 1.0, 1.0])
Пример #15
0
    def find_nodes(self, query):
        if query.pattern == 'foo':
            yield BranchNode('foo')

        elif query.pattern == 'bar.*':
            for i in xrange(10):
                path = 'bar.{0}'.format(i)
                yield LeafNode(path, DummyReader(path))
Пример #16
0
    def find_nodes(self, query):
        # find some paths matching the query, then yield them
        #for path in matches:
        #    if is_branch(path):
        #        yield BranchNode(path)
        #    if is_leaf(path):
        #        yield LeafNode(path, Reader(path))

        yield LeafNode(path, Reader(path))
Пример #17
0
    def find_nodes(self, query):
        '''
        This method processed in graphite 1.0 and older
        '''
        result = self._search_request(query.pattern)

        fetcher = GraphouseMultiFetcher()
        for metric in result[1]:
            if not metric:
                continue
            if metric.endswith('.'):
                yield BranchNode(metric[:-1])
            else:
                try:
                    yield LeafNode(metric,
                                   GraphouseReader(metric, fetcher=fetcher))
                except OverflowError:
                    fetcher = GraphouseMultiFetcher()
                    yield LeafNode(metric,
                                   GraphouseReader(metric, fetcher=fetcher))
Пример #18
0
    def find_nodes(self, query):

        childs = self.tree.selfAndChildPaths(query.pattern)
        childNodes = self.tree.getNode(
            [child for child, isMetric in childs if isMetric])

        # make sure we yield in the DB order
        for child, isMetric in childs:
            if isMetric:
                yield LeafNode(child, CassandraReader(childNodes[child],
                                                      child))
            else:
                yield BranchNode(child)
Пример #19
0
    def find_nodes(self, query):
        request = requests.post('%s/search' % graphouse_url,
                                data={'query': query.pattern})
        request.raise_for_status()
        result = request.text.split('\n')

        for metric in result:
            if not metric:
                continue
            if metric.endswith('.'):
                yield BranchNode(metric[:-1])
            else:
                yield LeafNode(metric, GraphouseReader(metric))
Пример #20
0
    def _find_paths(self, currNodeRowKey, patterns):
        """Recursively generates absolute paths whose components underneath current_node
        match the corresponding pattern in patterns"""

        from graphite.node import BranchNode, LeafNode
        from graphite.intervals import Interval, IntervalSet

        pattern = patterns[0]
        patterns = patterns[1:]

        nodeRow = self.client.getRow(self.metaTable, currNodeRowKey, None)
        if len(nodeRow) == 0:
            return

        subnodes = {}
        for k, v in nodeRow[0].columns.items():
            if k.startswith("cf:c_"):  # branches start with c_
                key = k.split("_", 2)[1]  # pop off cf:c_ prefix
                subnodes[key] = v.value

        matching_subnodes = match_entries(subnodes.keys(), pattern)

        if patterns:  # we've still got more directories to traverse
            for subnode in matching_subnodes:
                rowKey = subnodes[subnode]
                subNodeContents = self.client.getRow(self.metaTable, rowKey,
                                                     None)

                # leafs have a cf:INFO column describing their data
                # we can't possibly match on a leaf here because we have more components in the pattern,
                # so only recurse on branches
                if "cf:INFO" not in subNodeContents[0].columns:
                    for m in self._find_paths(rowKey, patterns):
                        yield m

        else:  # at the end of the pattern
            for subnode in matching_subnodes:
                rowKey = subnodes[subnode]
                nodeRow = self.client.getRow(self.metaTable, rowKey, None)
                if len(nodeRow) == 0:
                    continue
                metric = rowKey.split("_", 2)[1]  # pop off "m_" in key
                if "cf:INFO" in nodeRow[0].columns:
                    info = json.loads(nodeRow[0].columns["cf:INFO"].value)
                    start = time.time() - info['maxRetention']
                    end = time.time()
                    intervals = IntervalSet([Interval(start, end)])
                    reader = HbaseReader(metric, intervals, info, self)
                    yield LeafNode(metric, reader)
                else:
                    yield BranchNode(metric)
Пример #21
0
  def find_nodes(self, query):
    for fs_path in glob( self.tree.getFilesystemPath(query.pattern) ):
      metric_path = self.tree.getNodePath(fs_path)

      if CeresNode.isNodeDir(fs_path):
        ceres_node = self.tree.getNode(metric_path)

        if ceres_node.hasDataForInterval(query.startTime, query.endTime):
          real_metric_path = get_real_metric_path(fs_path, metric_path)
          reader = CeresReader(ceres_node, real_metric_path)
          yield LeafNode(metric_path, reader)

      elif isdir(fs_path):
        yield BranchNode(metric_path)
Пример #22
0
    def find_nodes(self, query):
        log.info('find_nodes: %s' % (query.pattern))

        # Parse the query
        path_items = filter(None, query.pattern.split('.'))

        records = []

        # Take request addressed only for this finder
        if path_items[0] == '*' or path_items[0] in self.tree:
            # Get the part of tree described by the query
            records = self.get_records(path_items)

        # Build node
        for record in records:
            if record['leaf']:
                yield LeafNode(record['id'], RandomReader(record['id']))
            else:
                yield BranchNode(record['id'])
Пример #23
0
  def find_nodes(self, query):

    variants = extract_variants(query.pattern)

    for variant in variants:
      for fs_path in glob( self.tree.getFilesystemPath(variant)):
        metric_path = self.tree.getNodePath(fs_path)

        if CeresNode.isNodeDir(fs_path):
          ceres_node = self.tree.getNode(metric_path)

          if ceres_node.hasDataForInterval(query.startTime, query.endTime):
            relative_path = fs_path[len(self.directory):].lstrip('/')
            real_metric_path = get_real_metric_path(fs_path, relative_path)
            reader = CeresReader(ceres_node, real_metric_path)
            yield LeafNode(metric_path, reader)

        elif os.path.isdir(fs_path):
          yield BranchNode(metric_path)
Пример #24
0
    def find_multi(self, patterns, reqkey=None):
        '''
        This method processed in graphite 1.1 and newer from self.fetch
        Returns:
            Generator of (pattern, [nodes])
        '''
        reqkey = reqkey or uuid.uuid4()
        jobs = [
            Job(self._search_request, 'Query graphouse for {}'.format(pattern),
                pattern) for pattern in patterns
        ]

        results = self.wait_jobs(jobs, getattr(settings, 'FIND_TIMEOUT'),
                                 'Find nodes for {} request'.format(reqkey))

        for pattern, metric_names in results:
            leafs = []
            for metric in metric_names:
                if metric and not metric.endswith('.'):
                    leafs.append(LeafNode(metric, None))
            yield (pattern, leafs)
Пример #25
0
    def test_RemoteFinder_fetch(self, http_request):
        finder = test_finder = RemoteFinder()
        store = test_finder.remote_stores[0]
        reader = RemoteReader(store,
                              {'intervals': [], 'path': 'a.b.c.d'},
                              bulk_query='a.b.c.d')
        node = LeafNode('a.b.c.d', reader)
        startTime = 1496262000
        endTime   = 1496262060

        data = [
                {'start': startTime,
                 'step': 60,
                 'end': endTime,
                 'values': [1.0, 0.0, 1.0, 0.0, 1.0],
                 'name': 'a.b.c.d'
                }
               ]
        responseObject = HTTPResponse(body=StringIO(pickle.dumps(data)), status=200)
        http_request.return_value = responseObject

        ret = finder.fetch(['a.b.c.d'], startTime, endTime)
        expected_response = ((1496262000, 1496262060, 60), [1.0, 0.0, 1.0, 0.0, 1.0])
        expected_response = [
            {
                'name': 'a.b.c.d',
                'values': [1.0, 0.0, 1.0, 0.0, 1.0],
                'pathExpression': 'a.b.c.d',
                'time_info': (1496262000, 1496262060, 60)
            }, {
                'name': 'a.b.c.d',
                'values': [1.0, 0.0, 1.0, 0.0, 1.0],
                'pathExpression': 'a.b.c.d',
                'time_info': (1496262000, 1496262060, 60)
            }
        ]
        result = list(ret.waitForResults())
        self.assertEqual(result, expected_response)
Пример #26
0
 def find_nodes(self, query):
     yield BranchNode('a.b.c')
     yield LeafNode('a.b.c.d', DummyReader('a.b.c.d'))
     yield LeafNode('a.b.c.e', DummyReader('a.b.c.e'))
Пример #27
0
    def send(self, headers=None, msg_setter=None):
        log.debug("FindRequest.send(host=%s, query=%s) called" %
                  (self.store.host, self.query))

        if headers is None:
            headers = {}

        results = cache.get(self.cacheKey)
        if results is not None:
            log.debug(
                "FindRequest.send(host=%s, query=%s) using cached result" %
                (self.store.host, self.query))
        else:
            url = "%s://%s/metrics/find/" % (
                'https' if settings.INTRACLUSTER_HTTPS else 'http',
                self.store.host)

            query_params = [
                ('local', '1'),
                ('format', 'pickle'),
                ('query', self.query.pattern),
            ]
            if self.query.startTime:
                query_params.append(('from', self.query.startTime))

            if self.query.endTime:
                query_params.append(('until', self.query.endTime))

            try:
                result = http.request(
                    'POST' if settings.REMOTE_STORE_USE_POST else 'GET',
                    url,
                    fields=query_params,
                    headers=headers,
                    timeout=settings.REMOTE_FIND_TIMEOUT)
            except BaseException:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) exception during request"
                    % (self.store.host, self.query))
                self.store.fail()
                return

            if result.status != 200:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) error response %d from %s?%s"
                    % (self.store.host, self.query, result.status, url,
                       urlencode(query_params)))
                self.store.fail()
                return

            try:
                results = unpickle.loads(result.data)
            except BaseException:
                log.exception(
                    "FindRequest.send(host=%s, query=%s) exception processing response"
                    % (self.store.host, self.query))
                self.store.fail()
                return

            cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION)

        msg_setter('host: {host}, query: {query}'.format(host=self.store.host,
                                                         query=self.query))

        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet([
                    Interval(interval[0], interval[1])
                    for interval in intervals
                ])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self.store,
                                      node_info,
                                      bulk_query=[self.query.pattern])
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            yield node
Пример #28
0
    def find_nodes(self, query, timer=None):
        timer.set_msg('host: {host}, query: {query}'.format(host=self.host,
                                                            query=query))

        log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" %
                  (self.host, query))

        # prevent divide by 0
        cacheTTL = settings.FIND_CACHE_DURATION or 1
        if query.startTime:
            start = query.startTime - (query.startTime % cacheTTL)
        else:
            start = ""

        if query.endTime:
            end = query.endTime - (query.endTime % cacheTTL)
        else:
            end = ""

        cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(
            query.pattern), start, end)

        results = cache.get(cacheKey)
        if results is not None:
            log.debug(
                "RemoteFinder.find_nodes(host=%s, query=%s) using cached result"
                % (self.host, query))
        else:
            url = '/metrics/find/'

            query_params = [
                ('local', self.params.get('local', '1')),
                ('format', self.params.get('format', 'pickle')),
                ('query', query.pattern),
            ]
            if query.startTime:
                query_params.append(('from', int(query.startTime)))

            if query.endTime:
                query_params.append(('until', int(query.endTime)))

            result = self.request(url,
                                  fields=query_params,
                                  headers=query.headers,
                                  timeout=settings.REMOTE_FIND_TIMEOUT)

            try:
                if result.getheader('content-type') == 'application/x-msgpack':
                    results = msgpack.load(BufferedHTTPReader(
                        result, buffer_size=settings.REMOTE_BUFFER_SIZE),
                                           encoding='utf-8')
                else:
                    results = unpickle.load(
                        BufferedHTTPReader(
                            result, buffer_size=settings.REMOTE_BUFFER_SIZE))
            except Exception as err:
                self.fail()
                log.exception(
                    "RemoteFinder[%s] Error decoding find response from %s: %s"
                    % (self.host, result.url_full, err))
                raise Exception("Error decoding find response from %s: %s" %
                                (result.url_full, err))
            finally:
                result.release_conn()

            cache.set(cacheKey, results, settings.FIND_CACHE_DURATION)

        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet([
                    Interval(interval[0], interval[1])
                    for interval in intervals
                ])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self, node_info)
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            yield node
Пример #29
0
    def find_nodes(self, query, timer=None):
        timer.set_msg(
            'host: {host}, query: {query}'.format(
                host=self.host,
                query=query))

        log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query))

        # prevent divide by 0
        cacheTTL = settings.FIND_CACHE_DURATION or 1
        if query.startTime:
            start = query.startTime - (query.startTime % cacheTTL)
        else:
            start = ""

        if query.endTime:
            end = query.endTime - (query.endTime % cacheTTL)
        else:
            end = ""

        cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(query.pattern), start, end)

        results = cache.get(cacheKey)
        if results is not None:
            log.debug(
                "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" %
                (self.host, query))
        else:
            url = '/metrics/find/'

            query_params = [
                ('local', self.params.get('local', '1')),
                ('format', self.params.get('format', 'pickle')),
                ('query', query.pattern),
            ]
            if query.startTime:
                query_params.append(('from', int(query.startTime)))

            if query.endTime:
                query_params.append(('until', int(query.endTime)))

            result = self.request(
                url,
                fields=query_params,
                headers=query.headers,
                timeout=settings.FIND_TIMEOUT)

            try:
                if result.getheader('content-type') == 'application/x-msgpack':
                  results = msgpack.load(BufferedHTTPReader(
                    result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8')
                else:
                  results = unpickle.load(BufferedHTTPReader(
                    result, buffer_size=settings.REMOTE_BUFFER_SIZE))
            except Exception as err:
                self.fail()
                log.exception(
                    "RemoteFinder[%s] Error decoding find response from %s: %s" %
                    (self.host, result.url_full, err))
                raise Exception("Error decoding find response from %s: %s" % (result.url_full, err))
            finally:
                result.release_conn()

            cache.set(cacheKey, results, settings.FIND_CACHE_DURATION)

        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet(
                    [Interval(interval[0], interval[1]) for interval in intervals])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self, node_info)
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            yield node
Пример #30
0
  def find(self, pattern, startTime=None, endTime=None, local=False):
    query = FindQuery(pattern, startTime, endTime)

    # Start remote searches
    if not local:
      remote_requests = [ r.find(query) for r in self.remote_stores if r.available ]

    matching_nodes = set()

    # Search locally
    for finder in self.finders:
      for node in finder.find_nodes(query):
        #log.info("find() :: local :: %s" % node)
        matching_nodes.add(node)

    # Gather remote search results
    if not local:
      for request in remote_requests:
        for node in request.get_results():
          #log.info("find() :: remote :: %s from %s" % (node,request.store.host))
          matching_nodes.add(node)

    # Group matching nodes by their path
    nodes_by_path = {}
    for node in matching_nodes:
      if node.path not in nodes_by_path:
        nodes_by_path[node.path] = []

      nodes_by_path[node.path].append(node)

    # Reduce matching nodes for each path to a minimal set
    found_branch_nodes = set()

    for path, nodes in nodes_by_path.iteritems():
      leaf_nodes = []

      # First we dispense with the BranchNodes
      for node in nodes:
        if node.is_leaf:
          leaf_nodes.append(node)
        elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!?
          yield node
          found_branch_nodes.add(node.path)

      if not leaf_nodes:
        continue

      # Calculate best minimal node set
      minimal_node_set = set()
      covered_intervals = IntervalSet([])

      # If the query doesn't fall entirely within the FIND_TOLERANCE window
      # we disregard the window. This prevents unnecessary remote fetches
      # caused when carbon's cache skews node.intervals, giving the appearance
      # remote systems have data we don't have locally, which we probably do.
      now = int( time.time() )
      tolerance_window = now - settings.FIND_TOLERANCE
      disregard_tolerance_window = query.interval.start < tolerance_window
      prior_to_window = Interval( float('-inf'), tolerance_window )

      def measure_of_added_coverage(node, drop_window=disregard_tolerance_window):
        relevant_intervals = node.intervals.intersect_interval(query.interval)
        if drop_window:
          relevant_intervals = relevant_intervals.intersect_interval(prior_to_window)
        return covered_intervals.union(relevant_intervals).size - covered_intervals.size

      nodes_remaining = list(leaf_nodes)

      # Prefer local nodes first (and do *not* drop the tolerance window)
      for node in leaf_nodes:
        if node.local and measure_of_added_coverage(node, False) > 0:
          nodes_remaining.remove(node)
          minimal_node_set.add(node)
          covered_intervals = covered_intervals.union(node.intervals)

      if settings.REMOTE_STORE_MERGE_RESULTS:
        remote_nodes = [n for n in nodes_remaining if not n.local]
        for node in remote_nodes:
          nodes_remaining.remove(node)
          minimal_node_set.add(node)
          covered_intervals = covered_intervals.union(node.intervals)
      else:
        while nodes_remaining:
          node_coverages = [ (measure_of_added_coverage(n), n) for n in nodes_remaining ]
          best_coverage, best_node = max(node_coverages)

          if best_coverage == 0:
            break

          nodes_remaining.remove(best_node)
          minimal_node_set.add(best_node)
          covered_intervals = covered_intervals.union(best_node.intervals)

        # Sometimes the requested interval falls within the caching window.
        # We include the most likely node if the gap is within tolerance.
        if not minimal_node_set:
          def distance_to_requested_interval(node):
            latest = sorted(node.intervals, key=lambda i: i.end)[-1]
            distance = query.interval.start - latest.end
            return distance if distance >= 0 else float('inf')

          best_candidate = min(leaf_nodes, key=distance_to_requested_interval)
          if distance_to_requested_interval(best_candidate) <= settings.FIND_TOLERANCE:
            minimal_node_set.add(best_candidate)

      if len(minimal_node_set) == 1:
        yield minimal_node_set.pop()
      elif len(minimal_node_set) > 1:
        reader = MultiReader(minimal_node_set)
        yield LeafNode(path, reader)
Пример #31
0
    def find_nodes(self, query):
        log.debug("IRONdbFinder.find_nodes, query: %s, max_retries: %d" % (query.pattern, self.max_retries))
        metrics_expand = False
        if query.pattern.endswith('.**'):
            query.pattern = query.pattern[:-1]
            metrics_expand = True
        names = {}
        name_headers = copy.deepcopy(self.headers)
        name_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list'
        for i in range(0, self.max_retries):
            try:
                if self.zipkin_enabled == True:
                    traceheader = binascii.hexlify(os.urandom(8))
                    name_headers['X-B3-TraceId'] = traceheader
                    name_headers['X-B3-SpanId'] = traceheader
                    if self.zipkin_event_trace_level == 1:
                        name_headers['X-Mtev-Trace-Event'] = '1'
                    elif self.zipkin_event_trace_level == 2:
                        name_headers['X-Mtev-Trace-Event'] = '2'
                r = requests.get(urls.names, params={'query': query.pattern}, headers=name_headers,
                                 timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0)))
                r.raise_for_status()
                if r.headers['content-type'] == 'application/json':
                    names = r.json()
                elif r.headers['content-type'] == 'application/x-flatbuffer-metric-find-result-list':
                    names = irondb_flatbuf.metric_find_results(r.content)
                else:
                    pass
                break
            except (socket.gaierror, requests.exceptions.ConnectionError) as ex:
                # on down nodes, try again on another node until "tries"
                log.exception("IRONdbFinder.find_nodes ConnectionError %s" % ex)
            except requests.exceptions.ConnectTimeout as ex:
                # on down nodes, try again on another node until "tries"
                log.exception("IRONdbFinder.find_nodes ConnectTimeout %s" % ex)
            except irondb_flatbuf.FlatBufferError as ex:
                # flatbuffer error, try again
                log.exception("IRONdbFinder.find_nodes FlatBufferError %s" % ex)
            except JSONDecodeError as ex:
                # json error, try again
                log.exception("IRONdbFinder.find_nodes JSONDecodeError %s" % ex)
            except requests.exceptions.ReadTimeout as ex:
                # on down nodes, try again on another node until "tries"
                log.exception("IRONdbFinder.find_nodes ReadTimeout %s" % ex)
            except requests.exceptions.HTTPError as ex:
                # http status code errors are failures, stop immediately
                log.exception("IRONdbFinder.find_nodes HTTPError %s %s" % (ex, r.content))
                break
        if settings.DEBUG:
            log.debug("IRONdbFinder.find_nodes, result: %s" % json.dumps(names))

        # for each set of self.batch_size leafnodes, execute an IRONdbMeasurementFetcher
        # so we can do these in batches.
        measurement_headers = copy.deepcopy(self.headers)
        measurement_headers['Accept'] = 'application/x-flatbuffer-metric-get-result-list'
        fetcher = IRONdbMeasurementFetcher(measurement_headers, self.timeout, self.connection_timeout, self.database_rollups, self.rollup_window, self.max_retries,
                                           self.zipkin_enabled, self.zipkin_event_trace_level)

        for name in names:
            if 'leaf' in name and 'leaf_data' in name:
                fetcher.add_leaf(name['name'], name['leaf_data'])
                reader = IRONdbReader(name['name'], fetcher)
                yield LeafNode(name['name'], reader)
            else:
                yield BranchNode(name['name'])
                if metrics_expand:
                    query = FindQuery(name['name'] + '.**', None, None)
                    for node in self.find_nodes(query):
                        yield node
Пример #32
0
    def find_all(self, query, headers=None):
        start = time.time()
        result_queue = Queue.Queue()
        jobs = []

        # Start remote searches
        if not query.local:
            random.shuffle(self.remote_stores)
            jobs.extend([(store.find, query, headers)
                         for store in self.remote_stores if store.available])

        # Start local searches
        for finder in self.finders:
            jobs.append((finder.find_nodes, query))

        if settings.USE_WORKER_POOL:
            return_result = lambda x: result_queue.put(x)
            for job in jobs:
                get_pool().apply_async(func=job[0],
                                       args=job[1:],
                                       callback=return_result)
        else:
            for job in jobs:
                result_queue.put(job[0](*job[1:]))

        # Group matching nodes by their path
        nodes_by_path = defaultdict(list)

        deadline = start + settings.REMOTE_FIND_TIMEOUT
        result_cnt = 0

        while result_cnt < len(jobs):
            wait_time = deadline - time.time()

            try:
                nodes = result_queue.get(True, wait_time)

            # ValueError could happen if due to really unlucky timing wait_time is negative
            except (Queue.Empty, ValueError):
                if time.time() > deadline:
                    log.info("Timed out in find_all after %fs" %
                             (settings.REMOTE_FIND_TIMEOUT))
                    break
                else:
                    continue

            log.info("Got a find result after %fs" % (time.time() - start))
            result_cnt += 1
            if nodes:
                for node in nodes:
                    nodes_by_path[node.path].append(node)

        log.info("Got all find results in %fs" % (time.time() - start))

        # Reduce matching nodes for each path to a minimal set
        found_branch_nodes = set()

        items = list(nodes_by_path.iteritems())
        random.shuffle(items)

        for path, nodes in items:
            leaf_nodes = []

            # First we dispense with the BranchNodes
            for node in nodes:
                if node.is_leaf:
                    leaf_nodes.append(node)
                elif node.path not in found_branch_nodes:  #TODO need to filter branch nodes based on requested interval... how?!?!?
                    yield node
                    found_branch_nodes.add(node.path)

            if not leaf_nodes:
                continue

            # Fast-path when there is a single node.
            if len(leaf_nodes) == 1:
                yield leaf_nodes[0]
                continue

            # Calculate best minimal node set
            minimal_node_set = set()
            covered_intervals = IntervalSet([])

            # If the query doesn't fall entirely within the FIND_TOLERANCE window
            # we disregard the window. This prevents unnecessary remote fetches
            # caused when carbon's cache skews node.intervals, giving the appearance
            # remote systems have data we don't have locally, which we probably do.
            now = int(time.time())
            tolerance_window = now - settings.FIND_TOLERANCE
            disregard_tolerance_window = query.interval.start < tolerance_window
            prior_to_window = Interval(float('-inf'), tolerance_window)

            def measure_of_added_coverage(
                    node, drop_window=disregard_tolerance_window):
                relevant_intervals = node.intervals.intersect_interval(
                    query.interval)
                if drop_window:
                    relevant_intervals = relevant_intervals.intersect_interval(
                        prior_to_window)
                return covered_intervals.union(
                    relevant_intervals).size - covered_intervals.size

            nodes_remaining = list(leaf_nodes)

            # Prefer local nodes first (and do *not* drop the tolerance window)
            for node in leaf_nodes:
                if node.local and measure_of_added_coverage(node, False) > 0:
                    nodes_remaining.remove(node)
                    minimal_node_set.add(node)
                    covered_intervals = covered_intervals.union(node.intervals)

            if settings.REMOTE_STORE_MERGE_RESULTS:
                remote_nodes = [n for n in nodes_remaining if not n.local]
                for node in remote_nodes:
                    nodes_remaining.remove(node)
                    minimal_node_set.add(node)
                    covered_intervals = covered_intervals.union(node.intervals)
            else:
                while nodes_remaining:
                    node_coverages = [(measure_of_added_coverage(n), n)
                                      for n in nodes_remaining]
                    best_coverage, best_node = max(node_coverages)

                    if best_coverage == 0:
                        break

                    nodes_remaining.remove(best_node)
                    minimal_node_set.add(best_node)
                    covered_intervals = covered_intervals.union(
                        best_node.intervals)

                # Sometimes the requested interval falls within the caching window.
                # We include the most likely node if the gap is within tolerance.
                if not minimal_node_set:

                    def distance_to_requested_interval(node):
                        if not node.intervals:
                            return float('inf')
                        latest = sorted(node.intervals,
                                        key=lambda i: i.end)[-1]
                        distance = query.interval.start - latest.end
                        return distance if distance >= 0 else float('inf')

                    best_candidate = min(leaf_nodes,
                                         key=distance_to_requested_interval)
                    if distance_to_requested_interval(
                            best_candidate) <= settings.FIND_TOLERANCE:
                        minimal_node_set.add(best_candidate)

            if len(minimal_node_set) == 1:
                yield minimal_node_set.pop()
            elif len(minimal_node_set) > 1:
                reader = MultiReader(minimal_node_set)
                yield LeafNode(path, reader)
Пример #33
0
  def send(self, headers=None, msg_setter=None):
    log.info("FindRequest.send(host=%s, query=%s) called" % (self.store.host, self.query))

    if headers is None:
      headers = {}

    results = cache.get(self.cacheKey)
    if results is not None:
      log.info("FindRequest.send(host=%s, query=%s) using cached result" % (self.store.host, self.query))
    else:
      url = "%s://%s/metrics/find/" % ('https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host)

      query_params = [
        ('local', '1'),
        ('format', 'pickle'),
        ('query', self.query.pattern),
      ]
      if self.query.startTime:
        query_params.append( ('from', self.query.startTime) )

      if self.query.endTime:
        query_params.append( ('until', self.query.endTime) )

      try:
        result = http.request('POST' if settings.REMOTE_STORE_USE_POST else 'GET',
                              url, fields=query_params, headers=headers, timeout=settings.REMOTE_FIND_TIMEOUT)
      except:
        log.exception("FindRequest.send(host=%s, query=%s) exception during request" % (self.store.host, self.query))
        self.store.fail()
        return

      if result.status != 200:
        log.exception("FindRequest.send(host=%s, query=%s) error response %d from %s?%s" % (self.store.host, self.query, result.status, url, urlencode(query_params)))
        self.store.fail()
        return

      try:
        results = unpickle.loads(result.data)
      except:
        log.exception("FindRequest.send(host=%s, query=%s) exception processing response" % (self.store.host, self.query))
        self.store.fail()
        return

      cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION)

    msg_setter('host: {host}, query: {query}'.format(host=self.store.host, query=self.query))

    for node_info in results:
      # handle both 1.x and 0.9.x output
      path = node_info.get('path') or node_info.get('metric_path')
      is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
      intervals = node_info.get('intervals') or []
      if not isinstance(intervals, IntervalSet):
        intervals = IntervalSet([Interval(interval[0], interval[1]) for interval in intervals])

      node_info = {
        'is_leaf': is_leaf,
        'path': path,
        'intervals': intervals,
      }

      if is_leaf:
        reader = RemoteReader(self.store, node_info, bulk_query=[self.query.pattern])
        node = LeafNode(path, reader)
      else:
        node = BranchNode(path)

      node.local = False
      yield node
Пример #34
0
    def _merge_leaf_nodes(self, query, path, leaf_nodes):
        """Get a single node from a list of leaf nodes."""
        if not leaf_nodes:
            return None

        # Fast-path when there is a single node.
        if len(leaf_nodes) == 1:
            return leaf_nodes[0]

        # Calculate best minimal node set
        minimal_node_set = set()
        covered_intervals = IntervalSet([])

        # If the query doesn't fall entirely within the FIND_TOLERANCE window
        # we disregard the window. This prevents unnecessary remote fetches
        # caused when carbon's cache skews node.intervals, giving the appearance
        # remote systems have data we don't have locally, which we probably
        # do.
        now = int(time.time())
        tolerance_window = now - settings.FIND_TOLERANCE
        disregard_tolerance_window = query.interval.start < tolerance_window
        prior_to_window = Interval(float('-inf'), tolerance_window)

        def measure_of_added_coverage(node,
                                      drop_window=disregard_tolerance_window):
            relevant_intervals = node.intervals.intersect_interval(
                query.interval)
            if drop_window:
                relevant_intervals = relevant_intervals.intersect_interval(
                    prior_to_window)
            return covered_intervals.union(
                relevant_intervals).size - covered_intervals.size

        nodes_remaining = list(leaf_nodes)

        # Prefer local nodes first (and do *not* drop the tolerance window)
        for node in leaf_nodes:
            if node.local and measure_of_added_coverage(node, False) > 0:
                nodes_remaining.remove(node)
                minimal_node_set.add(node)
                covered_intervals = covered_intervals.union(node.intervals)

        if settings.REMOTE_STORE_MERGE_RESULTS:
            remote_nodes = [n for n in nodes_remaining if not n.local]
            for node in remote_nodes:
                nodes_remaining.remove(node)
                minimal_node_set.add(node)
                covered_intervals = covered_intervals.union(node.intervals)
        else:
            while nodes_remaining:
                node_coverages = [(measure_of_added_coverage(n), n)
                                  for n in nodes_remaining]
                best_coverage, best_node = max(node_coverages)

                if best_coverage == 0:
                    break

                nodes_remaining.remove(best_node)
                minimal_node_set.add(best_node)
                covered_intervals = covered_intervals.union(
                    best_node.intervals)

            # Sometimes the requested interval falls within the caching window.
            # We include the most likely node if the gap is within
            # tolerance.
            if not minimal_node_set:

                def distance_to_requested_interval(node):
                    if not node.intervals:
                        return float('inf')
                    latest = sorted(node.intervals, key=lambda i: i.end)[-1]
                    distance = query.interval.start - latest.end
                    return distance if distance >= 0 else float('inf')

                best_candidate = min(leaf_nodes,
                                     key=distance_to_requested_interval)
                if distance_to_requested_interval(
                        best_candidate) <= settings.FIND_TOLERANCE:
                    minimal_node_set.add(best_candidate)

        if not minimal_node_set:
            return None
        elif len(minimal_node_set) == 1:
            return minimal_node_set.pop()
        else:
            reader = MultiReader(minimal_node_set)
            return LeafNode(path, reader)
Пример #35
0
    def find_nodes(self, query, timer=None):
        timer.set_msg(
            'host: {host}, query: {query}'.format(
                host=self.host,
                query=query))

        log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query))

        # prevent divide by 0
        cacheTTL = settings.FIND_CACHE_DURATION or 1
        if query.startTime:
            start = query.startTime - (query.startTime % cacheTTL)
        else:
            start = ""

        if query.endTime:
            end = query.endTime - (query.endTime % cacheTTL)
        else:
            end = ""

        cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(query.pattern), start, end)

        results = cache.get(cacheKey)
        if results is not None:
            log.debug(
                "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" %
                (self.host, query))
        else:
            url = '/metrics/find/'

            query_params = [
                ('local', self.params.get('local', '1')),
                ('format', self.params.get('format', 'pickle')),
                ('query', query.pattern),
            ]
            if query.startTime:
                query_params.append(('from', int(query.startTime)))

            if query.endTime:
                query_params.append(('until', int(query.endTime)))

            result = self.request(
                url,
                fields=query_params,
                headers=query.headers,
                timeout=settings.FIND_TIMEOUT)

            results = self.deserialize(result)

            cache.set(cacheKey, results, settings.FIND_CACHE_DURATION)

        # We don't use generator here, this function may be run as a job in a thread pool, using a generator has the following risks:
        # 1. Generators are lazy, if we don't iterator the returned generator in the job, the real execution(network operations,
        #    time-consuming) are very likely be triggered in the calling thread, losing the effect of thread pool;
        # 2. As function execution is delayed, the job manager can not catch job runtime exception as expected/designed;
        nodes = []
        for node_info in results:
            # handle both 1.x and 0.9.x output
            path = node_info.get('path') or node_info.get('metric_path')
            is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
            intervals = node_info.get('intervals') or []
            if not isinstance(intervals, IntervalSet):
                intervals = IntervalSet(
                    [Interval(interval[0], interval[1]) for interval in intervals])

            node_info = {
                'is_leaf': is_leaf,
                'path': path,
                'intervals': intervals,
            }

            if is_leaf:
                reader = RemoteReader(self, node_info)
                node = LeafNode(path, reader)
            else:
                node = BranchNode(path)

            node.local = False
            nodes.append(node)

        return nodes