示例#1
0
    def get_intervals(self):
        intervals = []
        for info in self.ceres_node.slice_info:
            (start, end, step) = info
            intervals.append( Interval(start, end) )

        return IntervalSet(intervals)
示例#2
0
    def get_intervals(self):
        fh = gzip.GzipFile(self.fs_path, 'rb')
        try:
            info = whisper.__readHeader(fh) # evil, but necessary.
        finally:
            fh.close()

        start = time.time() - info['maxRetention']
        end = max( os.stat(self.fs_path).st_mtime, start )
        return IntervalSet( [Interval(start, end)] )
示例#3
0
    def find(self, pattern, startTime=None, endTime=None, local=False):
        query = FindQuery(pattern, startTime, endTime)
        # Start remote searches
        if not local:
            remote_requests = [ r.find(query) for r in self.remote_stores if r.available ]
        matching_nodes = set()

        # Search locally
        for finder in self.finders:
            for node in finder.find_nodes(query):
                #log.info("find() :: local :: %s" % node)
                matching_nodes.add(node)
        # Gather remote search results
        if not local:
            for request in remote_requests:
                for node in request.get_results():
                    #log.info("find() :: remote :: %s from %s" % (node,request.store.host))
                    matching_nodes.add(node)
        # Group matching nodes by their path
        nodes_by_path = {}
        for node in matching_nodes:
            if node.path not in nodes_by_path:
                nodes_by_path[node.path] = []

            nodes_by_path[node.path].append(node)
        # Reduce matching nodes for each path to a minimal set
        found_branch_nodes = set()

        for path, nodes in nodes_by_path.iteritems():
            leaf_nodes = []

            # First we dispense with the BranchNodes
            for node in nodes:
                if node.is_leaf:
                    leaf_nodes.append(node)
                elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!?
                    yield node
                    found_branch_nodes.add(node.path)

            if not leaf_nodes:
                continue

            # Calculate best minimal node set
            minimal_node_set = set()
            covered_intervals = IntervalSet([])

            # If the query doesn't fall entirely within the FIND_TOLERANCE window
            # we disregard the window. This prevents unnecessary remote fetches
            # caused when carbon's cache skews node.intervals, giving the appearance
            # remote systems have data we don't have locally, which we probably do.
            now = int( time.time() )
            tolerance_window = now - settings.FIND_TOLERANCE
            disregard_tolerance_window = query.interval.start < tolerance_window
            prior_to_window = Interval( float('-inf'), tolerance_window )

            def measure_of_added_coverage(node, drop_window=disregard_tolerance_window):
                relevant_intervals = node.intervals.intersect_interval(query.interval)
                if drop_window:
                    relevant_intervals = relevant_intervals.intersect_interval(prior_to_window)
                return covered_intervals.union(relevant_intervals).size - covered_intervals.size

            nodes_remaining = list(leaf_nodes)

            # Prefer local nodes first (and do *not* drop the tolerance window)
            for node in leaf_nodes:
                if node.local and measure_of_added_coverage(node, False) > 0:
                    nodes_remaining.remove(node)
                    minimal_node_set.add(node)
                    covered_intervals = covered_intervals.union(node.intervals)

            while nodes_remaining:
                node_coverages = [ (measure_of_added_coverage(n), n) for n in nodes_remaining ]
                best_coverage, best_node = max(node_coverages)

                if best_coverage == 0:
                    break

                nodes_remaining.remove(best_node)
                minimal_node_set.add(best_node)
                covered_intervals = covered_intervals.union(best_node.intervals)

            # Sometimes the requested interval falls within the caching window.
            # We include the most likely node if the gap is within tolerance.
            if not minimal_node_set:
                def distance_to_requested_interval(node):
                    latest = sorted(node.intervals, key=lambda i: i.end)[-1]
                    distance = query.interval.start - latest.end
                    return distance if distance >= 0 else float('inf')

                best_candidate = min(leaf_nodes, key=distance_to_requested_interval)
                if distance_to_requested_interval(best_candidate) <= settings.FIND_TOLERANCE:
                    minimal_node_set.add(best_candidate)

            if len(minimal_node_set) == 1:
                yield minimal_node_set.pop()
            elif len(minimal_node_set) > 1:
                reader = MultiReader(minimal_node_set)
                yield LeafNode(path, reader)
示例#4
0
    def find(self, pattern, startTime=None, endTime=None, local=False):
        query = FindQuery(pattern, startTime, endTime)
        # Start remote searches
        if not local:
            remote_requests = [
                r.find(query) for r in self.remote_stores if r.available
            ]
        matching_nodes = set()

        # Search locally
        for finder in self.finders:
            for node in finder.find_nodes(query):
                #log.info("find() :: local :: %s" % node)
                matching_nodes.add(node)
        # Gather remote search results
        if not local:
            for request in remote_requests:
                for node in request.get_results():
                    #log.info("find() :: remote :: %s from %s" % (node,request.store.host))
                    matching_nodes.add(node)
        # Group matching nodes by their path
        nodes_by_path = {}
        for node in matching_nodes:
            if node.path not in nodes_by_path:
                nodes_by_path[node.path] = []

            nodes_by_path[node.path].append(node)
        # Reduce matching nodes for each path to a minimal set
        found_branch_nodes = set()

        for path, nodes in nodes_by_path.iteritems():
            leaf_nodes = []

            # First we dispense with the BranchNodes
            for node in nodes:
                if node.is_leaf:
                    leaf_nodes.append(node)
                elif node.path not in found_branch_nodes:  #TODO need to filter branch nodes based on requested interval... how?!?!?
                    yield node
                    found_branch_nodes.add(node.path)

            if not leaf_nodes:
                continue

            # Calculate best minimal node set
            minimal_node_set = set()
            covered_intervals = IntervalSet([])

            # If the query doesn't fall entirely within the FIND_TOLERANCE window
            # we disregard the window. This prevents unnecessary remote fetches
            # caused when carbon's cache skews node.intervals, giving the appearance
            # remote systems have data we don't have locally, which we probably do.
            now = int(time.time())
            tolerance_window = now - settings.FIND_TOLERANCE
            disregard_tolerance_window = query.interval.start < tolerance_window
            prior_to_window = Interval(float('-inf'), tolerance_window)

            def measure_of_added_coverage(
                    node, drop_window=disregard_tolerance_window):
                relevant_intervals = node.intervals.intersect_interval(
                    query.interval)
                if drop_window:
                    relevant_intervals = relevant_intervals.intersect_interval(
                        prior_to_window)
                return covered_intervals.union(
                    relevant_intervals).size - covered_intervals.size

            nodes_remaining = list(leaf_nodes)

            # Prefer local nodes first (and do *not* drop the tolerance window)
            for node in leaf_nodes:
                if node.local and measure_of_added_coverage(node, False) > 0:
                    nodes_remaining.remove(node)
                    minimal_node_set.add(node)
                    covered_intervals = covered_intervals.union(node.intervals)

            while nodes_remaining:
                node_coverages = [(measure_of_added_coverage(n), n)
                                  for n in nodes_remaining]
                best_coverage, best_node = max(node_coverages)

                if best_coverage == 0:
                    break

                nodes_remaining.remove(best_node)
                minimal_node_set.add(best_node)
                covered_intervals = covered_intervals.union(
                    best_node.intervals)

            # Sometimes the requested interval falls within the caching window.
            # We include the most likely node if the gap is within tolerance.
            if not minimal_node_set:

                def distance_to_requested_interval(node):
                    latest = sorted(node.intervals, key=lambda i: i.end)[-1]
                    distance = query.interval.start - latest.end
                    return distance if distance >= 0 else float('inf')

                best_candidate = min(leaf_nodes,
                                     key=distance_to_requested_interval)
                if distance_to_requested_interval(
                        best_candidate) <= settings.FIND_TOLERANCE:
                    minimal_node_set.add(best_candidate)

            if len(minimal_node_set) == 1:
                yield minimal_node_set.pop()
            elif len(minimal_node_set) > 1:
                reader = MultiReader(minimal_node_set)
                yield LeafNode(path, reader)
示例#5
0
 def get_intervals(self):
     interval_sets = []
     for node in self.nodes:
         interval_sets.extend( node.intervals.intervals )
     return IntervalSet( sorted(interval_sets) )
示例#6
0
 def get_intervals(self):
     start = time.time() - self.get_retention(self.fs_path)
     end = max( os.stat(self.fs_path).st_mtime, start )
     return IntervalSet( [Interval(start, end)] )
示例#7
0
 def get_intervals(self):
     start = time.time() - whisper.info(self.fs_path)['maxRetention']
     end = max( os.stat(self.fs_path).st_mtime, start )
     return IntervalSet( [Interval(start, end)] )
示例#8
0
 def get_intervals(self):
     return IntervalSet([Interval(time.time() - 3600, time.time())])