def aggregate_drcov_batch(self, drcov_list): """ Aggregate a given list of DrcovData into a single coverage mapping. See create_coverage_from_drcov_list(...) for more verbose comments. """ errors = [] # create a new coverage set to manually aggregate data into coverage = DatabaseCoverage(self._palette) for i, drcov_data in enumerate(drcov_list, 1): # keep the user informed about our progress while aggregating disassembler.replace_wait_box( "Aggregating batch data %u/%u" % (i, len(drcov_list)) ) # normalize coverage data to the open database try: addresses = self._normalize_drcov_data(drcov_data) except Exception as e: errors.append((self.ERROR_COVERAGE_ABSENT, drcov_data.filepath)) lmsg("Failed to normalize coverage %s" % drcov_data.filepath) lmsg("- %s" % e) continue # aggregate the addresses into the output coverage mapping coverage.add_addresses(addresses, False) # return the created coverage name return (coverage, errors)
def _evaluate_coverage_range(self, range_token): """ Evaluate a TokenCoverageRange AST token. Returns a new aggregate coverage set. """ assert isinstance(range_token, TokenCoverageRange) # initialize output to a null coverage set output = DatabaseCoverage(None, self._palette) # exapand 'A,Z' to ['A', 'B', 'C', ... , 'Z'] symbols = [chr(x) for x in range(ord(range_token.symbol_start), ord(range_token.symbol_end) + 1)] # build a coverage aggregate described by the range of shorthand symbols for symbol in symbols: output.add_data(self.get_coverage(self._alias2name[symbol]).data) # return the computed coverage return output
def _new_coverage(self, coverage_data): """ Build a new database coverage object from the given data. """ new_coverage = DatabaseCoverage(coverage_data, self._palette) new_coverage.update_metadata(self.metadata) new_coverage.refresh() return new_coverage
def _aggregate_batch(self, loaded_files): """ Aggregate the given loaded_files data into a single coverage object. """ idaapi.replace_wait_box("Aggregating coverage batch...") # create a new coverage set to manually aggregate data into coverage = DatabaseCoverage({}, self.palette) # # loop through the coverage data we have loaded from disk, and begin # the normalization process to translate / filter / flatten it for # insertion into the director (as a list of instruction addresses) # for i, data in enumerate(loaded_files, 1): # keep the user informed about our progress while loading coverage idaapi.replace_wait_box( "Aggregating batch data %u/%u" % (i, len(loaded_files)) ) # normalize coverage data to the open database try: addresses = self._normalize_coverage(data, self.director.metadata) # normalization failed, print & log it except Exception as e: lmsg("Failed to map coverage %s" % data.filepath) lmsg("- %s" % e) logger.exception("Error details:") continue # aggregate the addresses into the output coverage object coverage.add_addresses(addresses, False) # return the created coverage name return coverage
def _delete_aggregate_coverage(self): """ Delete the aggregate set, effectively clearing all loaded coverage. """ # loop through all the loaded coverage sets and release them for coverage_name in self.coverage_names: self._release_shorthand_alias(coverage_name) self._database_coverage.pop(coverage_name) # TODO/FUTURE: check if there's any references to the coverage aggregate? # assign a new, blank aggregate set self._special_coverage[AGGREGATE] = DatabaseCoverage(self._palette, AGGREGATE) self._refresh_aggregate() # probably not needed
def _build_coverage(self, coverage_base, coverage_data): """ Build a new database coverage object from the given data. """ # initialize a new database-wide coverage object for this data new_coverage = DatabaseCoverage(coverage_base, coverage_data, self._palette) # map the coverage data using the database metadata new_coverage.update_metadata(self.metadata) new_coverage.refresh() return new_coverage
def _evaluate_composition(self, ast): """ Evaluate the coverage composition described by the AST. """ # if the AST is effectively 'null', return a blank coverage set if isinstance(ast, TokenNull): return DatabaseCoverage(self._palette) # # the director's composition evaluation code (this function) is most # generally called via the background caching evaluation thread known # as self._composition_worker. But this function can also be called # inline via the 'add_composition' function from a different thread # (namely, the main thread) # # because of this, we must gate the resources that AST evaluation code # operates on behind a lock, restricting the code to one thread. # # should we call _evaluate_composition from the context of the main # thread, it is important that we do so in a pseudo non-blocking way # such that we don't hang the UI. await_lock(...) will allow the Qt # main thread to yield to other threads while waiting for the lock. # await_lock(self._composition_lock) # recursively evaluate the AST composite_coverage = self._evaluate_composition_recursive(ast) # map the composited coverage data to the database metadata composite_coverage.update_metadata(self.metadata) composite_coverage.refresh() # TODO/FUTURE: hash refresh? # done operating on shared data (coverage), release the lock self._composition_lock.release() # return the evaluated composition return composite_coverage
def update_coverage(self, coverage_name, coverage_data, coverage_filepath=None): """ Create or update a databases coverage mapping. """ assert not (coverage_name in RESERVED_NAMES) updating_coverage = coverage_name in self.coverage_names if updating_coverage: logger.debug("Updating coverage %s" % coverage_name) else: logger.debug("Adding coverage %s" % coverage_name) # create a new database coverage mapping from the given coverage data new_coverage = DatabaseCoverage( self._palette, coverage_name, coverage_filepath, coverage_data ) new_coverage.update_metadata(self.metadata) new_coverage.refresh() # # coverage mapping complete, looks like we're good. commit the new # coverage to the director's coverage table and surface it for use. # # note that this will overwrite an existing coverage mapping present # under the same name # self._commit_coverage(coverage_name, new_coverage) # assign a shorthand alias (if available) to new coverage additions if not updating_coverage: self._request_shorthand_alias(coverage_name) # notify any listeners that we have added or updated coverage if updating_coverage: self._notify_coverage_modified() else: self._notify_coverage_created() # return the created/updated coverage return new_coverage
def _evaluate_composition_recursive(self, node): """ The internal (recursive) AST evaluation routine. """ # # if the current node is a logic operator, we need to evaluate the # expressions that make up its input. only once each operand has # been reduced is it appropriate for us to manipulate them # if isinstance(node, TokenLogicOperator): # # collect the left and right components of the logical operation # eg: # op1 = DatabaseCoverage for 'A' # op2 = DatabaseCoverage for 'B' # op1 = self._evaluate_composition_recursive(node.op1) op2 = self._evaluate_composition_recursive(node.op2) # # Before computing a new composition, we actually compute a hash # actually compute a 'hash' of the operation that would normally # generate the composition. # # This 'hash' can be used to index into an LRU based cache that # holds compositions created by the AST evaluation process. # # The 'hash' is actually computed as a product of the operator # that would normally combine the two coverage sets. # # For example, when computing compositions the logical operators # (eg |, &, ^), it does not matter which side of the equation the # coverage components fall on. # eg: # (A | B) == (B | A) # # while arithmetic operations (-) will produce different results # # (A - B) != (B - A) # # So if we are being asked to compute a composition of (A | B), # we first compute: # # composition_hash = hash(A) | hash(B) # # And use composition_hash to check an LRU cache for the complete # evaluation/composition of (A | B). # # The possibility of collisions are generally higher with this # form of 'hash', but I still expect them to be extremely rare. # composition_hash = node.operator(op1.coverage_hash, op2.coverage_hash) # # Evaluating an AST produces lots of 'transient' compositions. To # mitigate unecessary re-computation, we maintain a small LRU cache # of these compositions to draw from during evaluation. # # eg: # evaluating the input # # (A | B) - (C | D) # # produces # # COMP_1 = (A | B) # COMP_2 = (C | D) # COMP_3 = COMP_1 - COMP_2 # # In the example above, COMP_3 is the final evaluated result, and # COMP_1/COMP_2 would normally be discarded. Instead, we cache all # of these compositions (1, 2, 3) as they may be useful to us in # the subsequent evaluations. # # If the user then choses to evaluate (A | B) - (Z | D), our cache # can retrieve the fully computed (A | B) composition assuming it # has not been evicted. # # check the cache to see if this composition was recently computed cached_coverage = self._composition_cache[composition_hash] # if the composition was found in the cache, return that for speed if cached_coverage: return cached_coverage # # using the collected components of the logical operation, we # compute the coverage mask defined by this TokenLogicOperator # coverage_mask = node.operator(op1.coverage, op2.coverage) # # now that we have computed the requested coverage mask (bitmap), # apply the mask to the data held by the left operand (op1). we # return a masked copy of said DatabaseCoverage # new_composition = DatabaseCoverage(coverage_mask, self._palette) # cache & return the newly computed composition self._composition_cache[composition_hash] = new_composition return new_composition # # if the current node is a coverage range, we need to evaluate the # range expression. this will produce an aggregate coverage set # described by the start/end of the range (Eg, 'A,D') # elif isinstance(node, TokenCoverageRange): return self._evaluate_coverage_range(node) # # if the current node is a coverage token, we need simply need # to return its associated DatabaseCoverage. # elif isinstance(node, TokenCoverageSingle): return self._evaluate_coverage(node) # # unknown token? (this should never happen) # raise ValueError("Invalid AST Token in Composition Tree")
def __init__(self, palette): # color palette self._palette = palette # database metadata cache self._database_metadata = DatabaseMetadata() # flag to suspend/resume the automatic coverage aggregation self._aggregation_suspended = False #---------------------------------------------------------------------- # Coverage #---------------------------------------------------------------------- # the name of the active coverage (eg filename) self.coverage_name = NEW_COMPOSITION # loaded or composed database coverage mappings self._database_coverage = collections.OrderedDict() # a NULL / empty coverage set self._NULL_COVERAGE = DatabaseCoverage(None, palette) # # the director automatically maintains or generates a few coverage # sets of its own. these are not directly modifiable by the user, # but may be influenced by user actions, or loaded coverage data. # # NOTE: The ordering of the dict below is the order that its items # will be shown in lists such as UI dropwdowns, etc. # self._special_coverage = collections.OrderedDict( [ (HOT_SHELL, DatabaseCoverage(None, palette)), # hot shell composition (NEW_COMPOSITION, DatabaseCoverage(None, palette)), # slow shell composition (AGGREGATE, DatabaseCoverage(None, palette)), # aggregate composition ]) #---------------------------------------------------------------------- # Aliases #---------------------------------------------------------------------- # # Within the director, one is allowed to alias the names of the # loaded coverage data it maintains. right now this is only used # to assign shorthand names to coverage data. # # in the future, this can be used for more fun/interesting user # mappings and aliases :-) # # # mapping of alias --> coverage_name # eg: 'A' --> 'my_loaded_coverage.log' # self._alias2name = {} # # mapping of coverage_name --> set(aliases) # eg: 'my_loaded_coverage.log' --> set('A', 'log1', 'foo') # self._name2alias = collections.defaultdict(set) # # shorthand 'symbols' are aliases that the director automatically # assigns to database coverage objects. these special aliases # consist of a single capital letter, eg 'A' # # these auto-aliased shorthand symbols were intended to be a less # cumbersome way to reference specific coverage sets while composing. # # Example - # # given these shorthand aliases: # # 'A' --> 'drcov.boombox.exe.04936.0000.proc.log' # 'B' --> 'drcov.boombox.exe.03297.0000.proc.log' # 'C' --> 'drcov.boombox.exe.08438.0000.proc.log' # 'D' --> 'drcov.boombox.exe.02349.0000.proc.log' # ... # 'Z' --> 'drcov.boombox.exe.50946.0000.proc.log' # <eof> # # one can more naturally compose interesting equations # # ((A & B) | (D & (E - F))) | Z # # the existing limitation of shorthand symbols is that there is # only 26 (A-Z) aliases that can be assigned to coverage sets. There # is no immediate plans to further expand this range. # # the primary justification for this limitation is that I don't # expect users to be building complex compositions with 26+ coverage # sets loaded at once. At that point, shorthand aliases really # aren't going to make things any less cumbersome. # self._shorthand = collections.deque(ASCII_SHORTHAND) # # assign default aliases # # alias the aggregate set to '*' self._alias_coverage(AGGREGATE, AGGREGATE_ALIAS) #---------------------------------------------------------------------- # Async #---------------------------------------------------------------------- self._ast_queue = Queue.Queue() self._composition_cache = CompositionCache() self._composition_worker = threading.Thread( target=self._async_evaluate_ast, name="EvaluateAST" ) self._composition_worker.start() #---------------------------------------------------------------------- # Callbacks #---------------------------------------------------------------------- # # As the director is the data source for much of Lighthouse, it # is important that anything built ontop of it can act on key # events or changes to the underlying data they consume. # # Callbacks provide a way for us to notify any interested parties # of these key events. # # lists of registered notification callbacks, see 'Callbacks' below self._coverage_switched_callbacks = [] self._coverage_modified_callbacks = [] self._coverage_created_callbacks = [] self._coverage_deleted_callbacks = []
def _evaluate_composition_recursive(self, node): """ The internal (recursive) AST evaluation routine. """ # # if the current AST node is a logic operator, we need to evaluate the # expressions that make up its input. only once each operand has been # concretized is it appropriate for us to operate on them # if isinstance(node, TokenLogicOperator): # # collect the left and right components of the logical operation # eg: # op1 = DatabaseCoverage for 'A' # op2 = DatabaseCoverage for 'B' # op1 = self._evaluate_composition_recursive(node.op1) op2 = self._evaluate_composition_recursive(node.op2) # # before computing a new composition, we first compute a low-cost # 'hash' of the desired operation. this hash can be used to # identify an existing (eg, previously computed) result, retrieving # it from an LRU based cache that holds compositions created by the # AST evaluation process. # # the 'hash' is actually computed as a product of the operator # that would normally combine the two coverage sets. # # for example, when evaluating a coverage composition, the logical # operators (eg |, &, ^), it does not matter which side of the # equation the coverage components fall on. # # eg: # (A | B) == (B | A) # # while arithmetic operations (-) will produce different results # # (A - B) != (B - A) # # so if we are being asked to compute a composition of (A | B), # we first compute: # # composition_hash = hash(A) | hash(B) # # using the composition_hash, we can check the LRU cache for a # previous computation of the composition (A | B). # # the possibility of collisions are generally higher with this # form of 'hash', but I still expect them to be extremely rare... # composition_hash = node.operator(op1.coverage_hash, op2.coverage_hash) # # evaluating an AST produces lots of 'transient' compositions. To # mitigate unnecessary re-computation, we maintain a small LRU cache # of these compositions to draw from during subsequent evaluations. # # eg: # evaluating the input # # (A | B) - (C | D) # # produces # # COMP_1 = (A | B) # COMP_2 = (C | D) # COMP_3 = COMP_1 - COMP_2 # # in the example above, COMP_3 is the final evaluated result that # will be returned to the user, while COMP_1/COMP_2 would normally # be discarded. Instead, we cache all of these compositions # (1, 2, 3) as they may be useful to us in future evaluations. # # later, if the user then choses to evaluate (A | B) - (Z | D), our # cache can retrieve the fully computed (A | B) composition # assuming it has not been evicted. # # this makes Lighthouse far more performant for repeated operations # # check the cache to see if this composition was recently computed cached_coverage = self._composition_cache[composition_hash] # if the composition was found in the cache, return that for speed if cached_coverage: return cached_coverage # # using the collected components of the logical operation, we # compute the coverage mask defined by this TokenLogicOperator # coverage_mask = node.operator(op1.coverage, op2.coverage) # # now that we have computed the requested coverage mask (a bitmap), # we use the mask to generate a new DatabaseCoverage mapping. # new_composition = DatabaseCoverage(self._palette, data=coverage_mask) # cache & return the newly computed composition self._composition_cache[composition_hash] = new_composition return new_composition # # if the current AST node is a coverage range, we need to evaluate the # range expression. this will produce an aggregate coverage set # described by the start/end of the range (eg, 'A,D') # elif isinstance(node, TokenCoverageRange): return self._evaluate_coverage_range(node) # # if the current AST node is a coverage token, we need simply need to # return its associated DatabaseCoverage. # elif isinstance(node, TokenCoverageSingle): return self._evaluate_coverage(node) # # unknown token? (this should never happen) # raise ValueError("Invalid AST Token in Composition Tree")
def __init__(self, palette): # the plugin color palette self._palette = palette # the central database metadata cache self.metadata = DatabaseMetadata() #---------------------------------------------------------------------- # Coverage #---------------------------------------------------------------------- # the name of the active coverage self.coverage_name = NEW_COMPOSITION # a map of loaded or composed database coverages self._database_coverage = collections.OrderedDict() # # the director automatically maintains / generates a few coverage sets # of its own. these are not directly modifiable by the user, but may # be influenced by user actions (say, loading new coverage data) # # Note that the ordering of the dict below is the order that its items # will be shown in lists such as the CoverageComboBox dropwdown, etc. # self._special_coverage = collections.OrderedDict( [ (HOT_SHELL, DatabaseCoverage(palette, HOT_SHELL)), (NEW_COMPOSITION, DatabaseCoverage(palette, NEW_COMPOSITION)), (AGGREGATE, DatabaseCoverage(palette, AGGREGATE)), ]) # a flag to suspend/resume the automatic coverage aggregation self._aggregation_suspended = False #---------------------------------------------------------------------- # Aliases #---------------------------------------------------------------------- # # Within the director, one is allowed to alias the names of the loaded # coverage data that it maintains. right now this is only used to # assign shorthand names to coverage data. # # mapping of {alias: coverage_name} # eg: 'A' --> 'my_loaded_coverage.log' # self._alias2name = {} # # mapping of {coverage_name: set(aliases)} # eg: 'my_loaded_coverage.log' --> set(['A', 'log1', 'foo']) # self._name2alias = collections.defaultdict(set) # # shorthand 'symbols' are aliases that the director automatically # assigns to loaded database coverage mappings. these special aliases # consist of a single capital letter, eg 'A' # # these auto-aliased shorthand symbols were intended to be a less # cumbersome way to reference specific coverage sets while composing. # # Example - # # given these shorthand aliases: # # 'A' --> 'drcov.boombox.exe.04936.0000.proc.log' # 'B' --> 'drcov.boombox.exe.03297.0000.proc.log' # 'C' --> 'drcov.boombox.exe.08438.0000.proc.log' # 'D' --> 'drcov.boombox.exe.02349.0000.proc.log' # ... # 'Z' --> 'drcov.boombox.exe.50946.0000.proc.log' # <eof> # # one can more naturally compose interesting coverage equations # # ((A & B) | (D & (E - F))) | Z # # the existing limitation of shorthand symbols is that there is # only 26 (A-Z) aliases that can be assigned to coverage sets. There # is no immediate plans to further expand this range. # # the primary justification for this limitation is that I don't # expect users to be building complex compositions with 26+ coverage # sets loaded at once. At that point, shorthand aliases really # aren't going to make things any less cumbersome. # self._shorthand = collections.deque(ASCII_SHORTHAND) # # assign default aliases # # alias the aggregate set to '*' self._alias_coverage(AGGREGATE, AGGREGATE_ALIAS) #---------------------------------------------------------------------- # Async Composition Computation #---------------------------------------------------------------------- # # the director is responsible for computing the logical/arithmetic # results of coverage set operations (composing). thanks to our lifted # metadata, we can do these set computations completely asynchronously. # # we use locks, queues, and a background 'composition worker' thread # to handle these computation requests. # self._ast_queue = Queue.Queue() self._composition_lock = threading.Lock() self._composition_cache = CompositionCache() self._composition_worker = threading.Thread( target=self._async_evaluate_ast, name="EvaluateAST" ) self._composition_worker.start() #---------------------------------------------------------------------- # Callbacks #---------------------------------------------------------------------- # # as the director is the data source for much of Lighthouse, it is # important that anything built on top of it can act on key events or # changes to the underlying data they consume. # # callbacks provide a way for us to notify any interested parties of # these key events. Below are lists of registered notification # callbacks. see 'Callbacks' section below for more info. # # coverage callbacks self._coverage_switched_callbacks = [] self._coverage_modified_callbacks = [] self._coverage_created_callbacks = [] self._coverage_deleted_callbacks = [] # metadata callbacks self._metadata_modified_callbacks = []