def get_intervals(self): intervals = [] for info in self.ceres_node.slice_info: (start, end, step) = info intervals.append( Interval(start, end) ) return IntervalSet(intervals)
def get_intervals(self): fh = gzip.GzipFile(self.fs_path, 'rb') try: info = whisper.__readHeader(fh) # evil, but necessary. finally: fh.close() start = time.time() - info['maxRetention'] end = max( os.stat(self.fs_path).st_mtime, start ) return IntervalSet( [Interval(start, end)] )
def find(self, pattern, startTime=None, endTime=None, local=False): query = FindQuery(pattern, startTime, endTime) # Start remote searches if not local: remote_requests = [ r.find(query) for r in self.remote_stores if r.available ] matching_nodes = set() # Search locally for finder in self.finders: for node in finder.find_nodes(query): #log.info("find() :: local :: %s" % node) matching_nodes.add(node) # Gather remote search results if not local: for request in remote_requests: for node in request.get_results(): #log.info("find() :: remote :: %s from %s" % (node,request.store.host)) matching_nodes.add(node) # Group matching nodes by their path nodes_by_path = {} for node in matching_nodes: if node.path not in nodes_by_path: nodes_by_path[node.path] = [] nodes_by_path[node.path].append(node) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() for path, nodes in nodes_by_path.iteritems(): leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int( time.time() ) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval( float('-inf'), tolerance_window ) def measure_of_added_coverage(node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval(query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval(prior_to_window) return covered_intervals.union(relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) while nodes_remaining: node_coverages = [ (measure_of_added_coverage(n), n) for n in nodes_remaining ] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union(best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval(best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def find(self, pattern, startTime=None, endTime=None, local=False): query = FindQuery(pattern, startTime, endTime) # Start remote searches if not local: remote_requests = [ r.find(query) for r in self.remote_stores if r.available ] matching_nodes = set() # Search locally for finder in self.finders: for node in finder.find_nodes(query): #log.info("find() :: local :: %s" % node) matching_nodes.add(node) # Gather remote search results if not local: for request in remote_requests: for node in request.get_results(): #log.info("find() :: remote :: %s from %s" % (node,request.store.host)) matching_nodes.add(node) # Group matching nodes by their path nodes_by_path = {} for node in matching_nodes: if node.path not in nodes_by_path: nodes_by_path[node.path] = [] nodes_by_path[node.path].append(node) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() for path, nodes in nodes_by_path.iteritems(): leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage( node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def get_intervals(self): interval_sets = [] for node in self.nodes: interval_sets.extend( node.intervals.intervals ) return IntervalSet( sorted(interval_sets) )
def get_intervals(self): start = time.time() - self.get_retention(self.fs_path) end = max( os.stat(self.fs_path).st_mtime, start ) return IntervalSet( [Interval(start, end)] )
def get_intervals(self): start = time.time() - whisper.info(self.fs_path)['maxRetention'] end = max( os.stat(self.fs_path).st_mtime, start ) return IntervalSet( [Interval(start, end)] )
def get_intervals(self): return IntervalSet([Interval(time.time() - 3600, time.time())])