def test_summary(self): d = metrics.Distribution("foo") self.assertEqual( "foo: total=0.0, count=0, min=None, max=None, mean=None, stdev=None", str(d)) # This test is delicate because it is checking the string output of # floating point calculations. This specific data set was chosen because # the number of samples is a power of two (thus the division is exact) and # the variance is a natural square (thus the sqrt() is exact). d.add(1) d.add(5) self.assertEqual( "foo: total=6.0, count=2, min=1, max=5, mean=3.0, stdev=2.0", str(d))
def test_accumulation(self): d = metrics.Distribution("foo") # Check contents of an empty distribution. self.assertEqual(0, d._count) self.assertEqual(0, d._total) self.assertIsNone(d._min) self.assertIsNone(d._max) self.assertIsNone(d._mean()) self.assertIsNone(d._stdev()) # Add some values. d.add(3) d.add(2) d.add(5) # Check the final contents. self.assertEqual(3, d._count) self.assertEqual(10, d._total) self.assertEqual(2, d._min) self.assertEqual(5, d._max) self.assertAlmostEqual(10.0 / 3, d._mean()) # Stddev should be sqrt(14/9). self.assertAlmostEqual(math.sqrt(14.0 / 9), d._stdev())
def test_merge(self): d = metrics.Distribution("foo") # Merge two empty metrics together. other = metrics.Distribution("d_empty") d._merge(other) self.assertEqual(0, d._count) self.assertEqual(0, d._total) self.assertEqual(0, d._squared) self.assertEqual(None, d._min) self.assertEqual(None, d._max) # Merge into an empty metric (verifies the case where min/max must be # copied directly from the merged metric). other = metrics.Distribution("d2") other.add(10) other.add(20) d._merge(other) self.assertEqual(2, d._count) self.assertEqual(30, d._total) self.assertEqual(500, d._squared) self.assertEqual(10, d._min) self.assertEqual(20, d._max) # Merge into an existing metric resulting in a new min. other = metrics.Distribution("d3") other.add(5) d._merge(other) self.assertEqual(3, d._count) self.assertEqual(35, d._total) self.assertEqual(525, d._squared) self.assertEqual(5, d._min) self.assertEqual(20, d._max) # Merge into an existing metric resulting in a new max. other = metrics.Distribution("d4") other.add(30) d._merge(other) self.assertEqual(4, d._count) self.assertEqual(65, d._total) self.assertEqual(1425, d._squared) self.assertEqual(5, d._min) self.assertEqual(30, d._max) # Merge an empty metric (slopppy min/max code would fail). other = metrics.Distribution("d5") d._merge(other) self.assertEqual(4, d._count) self.assertEqual(65, d._total) self.assertEqual(1425, d._squared) self.assertEqual(5, d._min) self.assertEqual(30, d._max)
def test_disabled(self): metrics._prepare_for_test(enabled=False) d = metrics.Distribution("foo") d.add(123) self.assertEqual(0, d._count)
class Solver(object): """The solver class is instantiated for a given "problem" instance. It maintains a cache of solutions for subproblems to be able to recall them if they reoccur in the solving process. """ _cache_metric = metrics.MapCounter("cfg_solver_cache") _goals_per_find_metric = metrics.Distribution("cfg_solver_goals_per_find") def __init__(self, program): """Initialize a solver instance. Every instance has their own cache. Arguments: program: The program we're in. """ self.program = program self._solved_states = {} self._path_finder = _PathFinder() def Solve(self, start_attrs, start_node): """Try to solve the given problem. Try to prove one or more bindings starting (and going backwards from) a given node, all the way to the program entrypoint. Arguments: start_attrs: The assignments we're trying to have, at the start node. start_node: The CFG node where we want the assignments to be active. Returns: True if there is a path through the program that would give "start_attr" its binding at the "start_node" program position. For larger programs, this might only look for a partial path (i.e., a path that doesn't go back all the way to the entry point of the program). """ state = State(start_node, start_attrs) return self._RecallOrFindSolution(state) def _RecallOrFindSolution(self, state): """Memoized version of FindSolution().""" if state in self._solved_states: Solver._cache_metric.inc("hit") return self._solved_states[state] # To prevent infinite loops, we insert this state into the hashmap as a # solvable state, even though we have not solved it yet. The reasoning is # that if it's possible to solve this state at this level of the tree, it # can also be solved in any of the children. self._solved_states[state] = True Solver._cache_metric.inc("miss") result = self._solved_states[state] = self._FindSolution(state) return result def _FindSolution(self, state): """Find a sequence of assignments that would solve the given state.""" if state.pos.condition: state.goals.add(state.pos.condition) Solver._goals_per_find_metric.add(len(state.goals)) for removed_goals, new_goals in state.RemoveFinishedGoals(): assert not state.pos.bindings & new_goals if _GoalsConflict(removed_goals): continue # We bulk-removed goals that are internally conflicting. if not new_goals: return True blocked = frozenset().union(*(goal.variable.nodes for goal in new_goals)) new_positions = set() for goal in new_goals: # "goal" is the assignment we're trying to find. for origin in goal.origins: path_exist, path = self._path_finder.FindNodeBackwards( state.pos, origin.where, blocked) if path_exist: where = origin.where # Check if we found conditions on the way. for node in path: if node is not state.pos: where = node break new_positions.add(where) for new_pos in new_positions: new_state = State(new_pos, new_goals) if self._RecallOrFindSolution(new_state): return True return False
"""Points-to / dataflow / cfg graph engine. It can be used to run reaching-definition queries on a nested CFG graph and to model path-specific visibility of nested data structures. """ import collections import logging from pytype import metrics log = logging.getLogger(__name__) _variable_size_metric = metrics.Distribution("variable_size") # Across a sample of 19352 modules, for files which took more than 25 seconds, # the largest variable was, on average, 157. For files below 25 seconds, it was # 7. Additionally, for 99% of files, the largest variable was below 64, so we # use that as the cutoff. MAX_VAR_SIZE = 64 class Program(object): """Program instances describe program entities. This class ties together the CFG, the data flow graph (variables + bindings) as well as methods. We use this for issuing IDs: We need every CFG node to have a unique ID, and this class does the corresponding counting. Attributes: entrypoint: Entrypoint of the program, if it has one. (None otherwise)
class Solver(object): """The solver class is instantiated for a given "problem" instance. It maintains a cache of solutions for subproblems to be able to recall them if they reoccur in the solving process. """ _cache_metric = metrics.MapCounter("cfg_solver_cache") _goals_per_find_metric = metrics.Distribution("cfg_solver_goals_per_find") def __init__(self, program): """Initialize a solver instance. Every instance has their own cache. Arguments: program: The program we're in. """ self.program = program self._solved_states = {} self._path_finder = _PathFinder() def Solve(self, start_attrs, start_node): """Try to solve the given problem. Try to prove one or more bindings starting (and going backwards from) a given node, all the way to the program entrypoint. Arguments: start_attrs: The assignments we're trying to have, at the start node. start_node: The CFG node where we want the assignments to be active. Returns: True if there is a path through the program that would give "start_attr" its binding at the "start_node" program position. For larger programs, this might only look for a partial path (i.e., a path that doesn't go back all the way to the entry point of the program). """ state = State(start_node, start_attrs) return self._RecallOrFindSolution(state, frozenset(start_attrs)) def _RecallOrFindSolution(self, state, seen_goals): """Memoized version of FindSolution().""" if state in self._solved_states: Solver._cache_metric.inc("hit") return self._solved_states[state] # To prevent infinite loops, we insert this state into the hashmap as a # solvable state, even though we have not solved it yet. The reasoning is # that if it's possible to solve this state at this level of the tree, it # can also be solved in any of the children. self._solved_states[state] = True Solver._cache_metric.inc("miss") result = self._solved_states[state] = self._FindSolution( state, seen_goals) return result def _FindSolution(self, state, seen_goals): """Find a sequence of assignments that would solve the given state.""" if state.Done(): return True if _GoalsConflict(state.goals): return False Solver._goals_per_find_metric.add(len(state.goals)) # Note that this set might contain the current CFG node: blocked = frozenset(state.NodesWithAssignments()) # Find the goal cfg node that was assigned last. Due to the fact that we # treat CFGs as DAGs, there's typically one unique cfg node with this # property. for goal in state.goals: # "goal" is the assignment we're trying to find. for origin in goal.origins: path_exist, path = self._path_finder.FindNodeBackwards( state.pos, origin.where, blocked) if path_exist: # This loop over multiple different combinations of origins is why # we need memoization of states. for source_set in origin.source_sets: new_goals = set(state.goals) where = origin.where # If we found conditions on the way, see whether we need to add # any of them to our goals. for node in path: if node.condition not in seen_goals: # It can happen that node == state.pos, typically if the node # we're calling HasCombination on has a condition. If so, we'll # treat it like any other condition and add it to our goals. new_goals.add(node.condition) where = node break new_state = State(where, new_goals) if origin.where is new_state.pos: # The goal can only be replaced if origin.where was actually # reached. new_state.Replace(goal, source_set) # Also remove all goals that are trivially fulfilled at the # new CFG node. removed = new_state.RemoveFinishedGoals() removed.add(goal) if _GoalsConflict(removed | new_state.goals): pass # We bulk-removed goals that are internally conflicting. elif self._RecallOrFindSolution( new_state, seen_goals | new_goals): return True return False
class Solver(object): """The solver class is instantiated for a given "problem" instance. It maintains a cache of solutions for subproblems to be able to recall them if they reoccur in the solving process. """ _cache_metric = metrics.MapCounter("cfg_solver_cache") _goals_per_find_metric = metrics.Distribution("cfg_solver_goals_per_find") def __init__(self, program): """Initialize a solver instance. Every instance has their own cache. Arguments: program: The program we're in. """ self.program = program self._solved_states = {} def Solve(self, start_attrs, start_node): """Try to solve the given problem. Try to prove one or more bindings starting (and going backwards from) a given node, all the way to the program entrypoint. Arguments: start_attrs: The assignments we're trying to have, at the start node. start_node: The CFG node where we want the assignments to be active. Returns: True if there is a path through the program that would give "start_attr" its binding at the "start_node" program position. For larger programs, this might only look for a partial path (i.e., a path that doesn't go back all the way to the entry point of the program). """ state = State(start_node, start_attrs) return self._RecallOrFindSolution(state) def _RecallOrFindSolution(self, state): """Memoized version of FindSolution().""" if state in self._solved_states: Solver._cache_metric.inc("hit") return self._solved_states[state] # To prevent infinite loops, we insert this state into the hashmap as a # solvable state, even though we have not solved it yet. The reasoning is # that if it's possible to solve this state at this level of the tree, it # can also be solved in any of the children. self._solved_states[state] = True Solver._cache_metric.inc("miss") result = self._solved_states[state] = self._FindSolution(state) return result def _FindSolution(self, state): """Find a sequence of assignments that would solve the given state.""" if state.Done(): return True if state.HasConflictingGoals(): return False Solver._goals_per_find_metric.add(len(state.goals)) blocked = state.NodesWithAssignments() # We don't treat our current CFG node as blocked: If one of the goal # variables is overwritten by an assignment at our current pos, we assume # that assignment can still see the previous bindings. blocked.discard(state.pos) blocked = frozenset(blocked) # Find the goal cfg node that was assigned last. Due to the fact that we # treat CFGs as DAGs, there's typically one unique cfg node with this # property. for goal in state.goals: # "goal" is the assignment we're trying to find. for origin in goal.origins: if _FindNodeBackwards(state.pos, origin.where, blocked): # This loop over multiple different combinations of origins is why # we need memoization of states. for source_set in origin.source_sets: new_state = State(origin.where, state.goals) new_state.Replace(goal, source_set) # Also remove all goals that are trivially fulfilled at the # new CFG node. new_state.RemoveFinishedGoals() if self._RecallOrFindSolution(new_state): return True return False
class Solver(object): """The solver class is instantiated for a given "problem" instance. It maintains a cache of solutions for subproblems to be able to recall them if they reoccur in the solving process. """ _cache_metric = metrics.MapCounter("cfg_solver_cache") _goals_per_find_metric = metrics.Distribution("cfg_solver_goals_per_find") def __init__(self, program): """Initialize a solver instance. Every instance has their own cache. Arguments: program: The program we're in. """ self.program = program self._solved_states = {} self._path_finder = _PathFinder() def Solve(self, start_attrs, start_node): """Try to solve the given problem. Try to prove one or more bindings starting (and going backwards from) a given node, all the way to the program entrypoint. Arguments: start_attrs: The assignments we're trying to have, at the start node. start_node: The CFG node where we want the assignments to be active. Returns: True if there is a path through the program that would give "start_attr" its binding at the "start_node" program position. For larger programs, this might only look for a partial path (i.e., a path that doesn't go back all the way to the entry point of the program). """ state = State(start_node, start_attrs) return self._RecallOrFindSolution(state) def _RecallOrFindSolution(self, state): """Memoized version of FindSolution().""" if state in self._solved_states: Solver._cache_metric.inc("hit") return self._solved_states[state] # To prevent infinite loops, we insert this state into the hashmap as a # solvable state, even though we have not solved it yet. The reasoning is # that if it's possible to solve this state at this level of the tree, it # can also be solved in any of the children. self._solved_states[state] = True Solver._cache_metric.inc("miss") result = self._solved_states[state] = self._FindSolution(state) return result def _IsSolvedBefore(self, where, goal, entrypoint, blocked): """Determine if a goal is possibly solved in subsection of the CFG. If a condition introduces a new goal, but we can solve that goal *before* the goal we were trying to solve originally, assume that goal doesn't have anything to do with us. This currently does a quick CFG check as an approximation. An alternative implementation would be to call _FindSolution while blocking the new entrypoint. Args: where: Current CFG node. We search backwards from this node. goal: The goal to find a solution for. entrypoint: The "new" entry point of the graph. This typically reduces the CFG to a subgraph. blocked: A list of nodes. Returns: True if we think this goal can be solved without traversing beyond "entrypoint", False if it can't. """ blocked = frozenset(blocked | {entrypoint}) for origin in goal.origins: # TODO(kramm): We don't cache this. Should we? if origin.where not in blocked and self._path_finder.FindPathToNode( where, origin.where, blocked): return True return False def _FindSolution(self, state): """Find a sequence of assignments that would solve the given state.""" if state.Done(): return True if _GoalsConflict(state.goals): return False Solver._goals_per_find_metric.add(len(state.goals)) blocked = state.NodesWithAssignments() # We don't treat our current CFG node as blocked: If one of the goal # variables is overwritten by an assignment at our current pos, we assume # that assignment can still see the previous bindings. blocked.discard(state.pos) blocked = frozenset(blocked) # Find the goal cfg node that was assigned last. Due to the fact that we # treat CFGs as DAGs, there's typically one unique cfg node with this # property. for goal in state.goals: # "goal" is the assignment we're trying to find. for origin in goal.origins: path_exist, path = self._path_finder.FindNodeBackwards( state.pos, origin.where, blocked) if path_exist: # This loop over multiple different combinations of origins is why # we need memoization of states. for source_set in origin.source_sets: new_goals = set(state.goals) where = origin.where # If we found conditions on the way, see whether we need to add # any of them to our goals. for node in path: if node.condition not in state.goals and not self._IsSolvedBefore( node, node.condition, origin.where, blocked): # TODO(kramm): what if node == state.pos? new_goals.add(node.condition) where = node break new_state = State(where, new_goals) if origin.where is new_state.pos: # The goal can only be replaced if origin.where was actually # reached. new_state.Replace(goal, source_set) # Also remove all goals that are trivially fulfilled at the # new CFG node. removed = new_state.RemoveFinishedGoals() removed.add(goal) if _GoalsConflict(removed): # Sometimes, we bulk-remove goals that are internally conflicting. return False if self._RecallOrFindSolution(new_state): return True return False