Пример #1
0
    def __init__(self, base, indexed_data, palette):

        # the color palette used when painting this coverage
        self.palette = palette

        if not indexed_data:
            indexed_data = collections.defaultdict(int)

        self._base = base
        self.coverage_data = indexed_data
        self.unmapped_coverage = set(indexed_data.keys())
        self.unmapped_coverage.add(idaapi.BADADDR)

        # the metadata this coverage will be mapped to
        self._metadata = DatabaseMetadata(False)

        # maps to the child coverage objects
        self.nodes = {}
        self.functions = {}

        #
        # profiling revealed that letting every child (eg, FunctionCoverage
        # or NodeCoverage) create their own weakref to the parent/database
        # was actually adding a reasonable and unecessary overhead. There's
        # really no reason they need to do that anyway.
        #
        # we instantiate a single weakref of ourself (the DatbaseCoverage
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)
Пример #2
0
    def _refresh_database_metadata(self):
        """
        Refresh the database metadata cache utilized by the director.
        """
        logger.debug("Refreshing database metadata")

        # compute the metadata for the current state of the database
        new_metadata = DatabaseMetadata()
        new_metadata.build_metadata()

        # compute the delta between the old metadata, and latest
        delta = MetadataDelta(new_metadata, self.metadata)

        # save the new metadata in place of the old metadata
        self._database_metadata = new_metadata

        # finally, return the list of nodes that have changed (the delta)
        return delta
Пример #3
0
    def __init__(self, core, dctx):
        disassembler[self] = DisassemblerContextAPI(dctx)
        self.core = core
        self.dctx = dctx
        self._started = False

        # the database metadata cache
        self.metadata = DatabaseMetadata(self)

        # the coverage engine
        self.director = CoverageDirector(self.metadata, self.core.palette)

        # the coverage painter
        self.painter = CoveragePainter(self, self.director, self.core.palette)

        # the coverage overview widget
        self.coverage_overview = None

        # the directory to start the coverage file dialog in
        self._last_directory = None
Пример #4
0
    def __init__(self, data, palette):

        # color palette
        self.palette = palette

        #
        # the abstract above gives some background to the design employed by
        # Lighthouse to map coverage data to the database.
        #
        # coverage objects such as this (DatabaseCoverage) are basically
        # glorified mappings of coverage / runtime data on top of their
        # metadata counterparts. A coverage object by itself is mostly useless
        # without its corresponding metadata object.
        #
        # here we simply populate self._metadata with a stub metadata object,
        # but at runtime we will inject a fully collected DatabaseMetadata
        # object as maintained by the director.
        #

        self._metadata = DatabaseMetadata()

        #
        # the hitmap effectively holds the raw coverage data. the name
        # should speak for itself, but a hitmap will track the number of
        # times a given address / instruction was executed.
        #
        #  Eg:
        #      hitmap =
        #      {
        #          0x8040100: 1,
        #          0x8040102: 1,
        #          0x8040105: 3,
        #          0x8040108: 3,  # 0x8040108 was executed 3 times...
        #          0x804010a: 3,
        #          0x804010f: 1,
        #          ...
        #      }
        #
        # this structure gives us an interesting degree of flexibility with
        # regard to what data sources we can consue (inst trace, coverage, etc)
        # and ways we can leverage said data (visualize coverage, heatmaps)
        #

        self._hitmap = build_hitmap(data)

        #
        # the coverage hash is a simple hash of the coverage bitmap/mask.
        # it is primarily used by the director as a means of quickly comparing
        # coverage, and predicting outputs of logical / arithmetic operations.
        #
        # the hash will need to be updated via _update_coverage_hash() anytime
        # the hitmap is modified or changed internally. we cache a concrete
        # result of the coverage hash because computing the hash on demand can
        # be expensive in terms of time.
        #
        # see the usage of 'coverage_hash' in director.py for more info
        #

        self.coverage_hash = 0
        self._update_coverage_hash()

        #
        # Lighthouse will only compute coverage for code within defined
        # functions. therefore, all coverage / runtime data will get bucketed
        # into its appropriate NodeCoverage object (eg, a basic block) or it
        # will be considered 'unmapped'
        #
        # starting out, all coverage data is marked as unmapped
        #

        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(idaapi.BADADDR)

        #
        # self._map_coverage is responsible for mapping coverage data to the
        # database (via the lifted 'DatabaseMetadata' cache). The mapping
        # process will yield NodeCoverage & FunctionCoverage objects.
        #
        # NodeCoverage objects represent coverage at the node (basic block)
        # level and are owned by their respective FunctionCoverage objects.
        #
        # FunctionCoverage represent coverage at the function level by
        # leveraging their respective NodeCoverage children.
        #

        self.nodes     = {}
        self.functions = {}

        #
        # we instantiate a single weakref of ourself (the DatbaseMapping
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)
Пример #5
0
class DatabaseCoverage(object):
    """
    Database level coverage mapping.
    """

    def __init__(self, data, palette):

        # color palette
        self.palette = palette

        #
        # the abstract above gives some background to the design employed by
        # Lighthouse to map coverage data to the database.
        #
        # coverage objects such as this (DatabaseCoverage) are basically
        # glorified mappings of coverage / runtime data on top of their
        # metadata counterparts. A coverage object by itself is mostly useless
        # without its corresponding metadata object.
        #
        # here we simply populate self._metadata with a stub metadata object,
        # but at runtime we will inject a fully collected DatabaseMetadata
        # object as maintained by the director.
        #

        self._metadata = DatabaseMetadata()

        #
        # the hitmap effectively holds the raw coverage data. the name
        # should speak for itself, but a hitmap will track the number of
        # times a given address / instruction was executed.
        #
        #  Eg:
        #      hitmap =
        #      {
        #          0x8040100: 1,
        #          0x8040102: 1,
        #          0x8040105: 3,
        #          0x8040108: 3,  # 0x8040108 was executed 3 times...
        #          0x804010a: 3,
        #          0x804010f: 1,
        #          ...
        #      }
        #
        # this structure gives us an interesting degree of flexibility with
        # regard to what data sources we can consue (inst trace, coverage, etc)
        # and ways we can leverage said data (visualize coverage, heatmaps)
        #

        self._hitmap = build_hitmap(data)

        #
        # the coverage hash is a simple hash of the coverage bitmap/mask.
        # it is primarily used by the director as a means of quickly comparing
        # coverage, and predicting outputs of logical / arithmetic operations.
        #
        # the hash will need to be updated via _update_coverage_hash() anytime
        # the hitmap is modified or changed internally. we cache a concrete
        # result of the coverage hash because computing the hash on demand can
        # be expensive in terms of time.
        #
        # see the usage of 'coverage_hash' in director.py for more info
        #

        self.coverage_hash = 0
        self._update_coverage_hash()

        #
        # Lighthouse will only compute coverage for code within defined
        # functions. therefore, all coverage / runtime data will get bucketed
        # into its appropriate NodeCoverage object (eg, a basic block) or it
        # will be considered 'unmapped'
        #
        # starting out, all coverage data is marked as unmapped
        #

        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(idaapi.BADADDR)

        #
        # self._map_coverage is responsible for mapping coverage data to the
        # database (via the lifted 'DatabaseMetadata' cache). The mapping
        # process will yield NodeCoverage & FunctionCoverage objects.
        #
        # NodeCoverage objects represent coverage at the node (basic block)
        # level and are owned by their respective FunctionCoverage objects.
        #
        # FunctionCoverage represent coverage at the function level by
        # leveraging their respective NodeCoverage children.
        #

        self.nodes     = {}
        self.functions = {}

        #
        # we instantiate a single weakref of ourself (the DatbaseMapping
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)

    #--------------------------------------------------------------------------
    # Propertiens
    #--------------------------------------------------------------------------

    @property
    def data(self):
        """
        The data (a hitmap) used by this mapping.
        """
        return self._hitmap

    @property
    def coverage(self):
        """
        The instruction-level coverage bitmap/mask of this mapping.
        """
        return self._hitmap.viewkeys()

    @property
    def instruction_percent(self):
        """
        The database coverage % by instructions executed in all defined functions.
        """
        num_funcs = len(self._metadata.functions)

        # avoid a zero division error
        if not num_funcs:
            return 0

        # sum all the function coverage %'s
        func_sum = sum(f.instruction_percent for f in self.functions.itervalues())

        # return the average function coverage % aka 'the database coverage %'
        return func_sum / num_funcs

    #--------------------------------------------------------------------------
    # Metadata Population
    #--------------------------------------------------------------------------

    def update_metadata(self, metadata, delta=None):
        """
        Install a new databasee metadata object.
        """

        # install the new metadata
        self._metadata = weakref.proxy(metadata)

        # unmap all the coverage affected by the metadata delta
        if delta:
            self._unmap_delta(delta)

    def refresh(self):
        """
        Refresh the mapping of our coverage data to the database metadata.
        """

        # rebuild our coverage mapping
        dirty_nodes, dirty_functions = self._map_coverage()

        # bake our coverage map
        self._finalize(dirty_nodes, dirty_functions)

        # dump the unmappable coverage data
        #self.dump_unmapped()

    def refresh_nodes(self):
        """
        Special fast-refresh of nodes as used in the un-painting process.
        """
        dirty_nodes = self._map_nodes()
        self._finalize_nodes(dirty_nodes)

    def _finalize(self, dirty_nodes, dirty_functions):
        """
        Finalize coverage objects for use.
        """
        self._finalize_nodes(dirty_nodes)
        self._finalize_functions(dirty_functions)

    def _finalize_nodes(self, dirty_nodes):
        """
        Finalize coverage nodes for use.
        """
        for node_coverage in dirty_nodes.itervalues():
            node_coverage.finalize()

    def _finalize_functions(self, dirty_functions):
        """
        Finalize coverage nodes for use.
        """
        for function_coverage in dirty_functions.itervalues():
            function_coverage.finalize()

    #--------------------------------------------------------------------------
    # Data Operations
    #--------------------------------------------------------------------------

    def add_data(self, data):
        """
        Add runtime data to this mapping.
        """

        # add the given runtime data to our data source
        for address, hit_count in data.iteritems():
            self._hitmap[address] += hit_count

        # update the coverage hash incase the hitmap changed
        self._update_coverage_hash()

        # mark these touched addresses as dirty
        self._unmapped_data |= data.viewkeys()

    def subtract_data(self, data):
        """
        Subtract runtime data from this mapping.
        """

        # subtract the given runtime data from our data source
        for address, hit_count in data.iteritems():
            self._hitmap[address] -= hit_count

            #assert self._hitmap[address] >= 0

            #
            # if there is no longer any hits for this address, delete its
            # entry from the source_data dictonary. we don't want its entry to
            # hang around because we use self._hitmap.viewkeys() as a
            # coverage bitmap/mask
            #

            if not self._hitmap[address]:
                del self._hitmap[address]

        # update the coverage hash incase the hitmap changed
        self._update_coverage_hash()

        #
        # unmap everything because a complete re-mapping is easier with the
        # current implementation of things
        #

        self._unmap_all()

    #--------------------------------------------------------------------------
    # Coverage Operations
    #--------------------------------------------------------------------------

    def mask_data(self, coverage_mask):
        """
        Mask the hitmap data against a given coverage mask.

        Returns a new DatabaseCoverage containing the masked hitmap.
        """
        composite_data = collections.defaultdict(int)

        # preserve only hitmap data that matches the coverage mask
        for address in coverage_mask:
            composite_data[address] = self._hitmap[address]

        # done, return a new DatabaseCoverage masked with the given coverage
        return DatabaseCoverage(composite_data, self.palette)

    def _update_coverage_hash(self):
        """
        Update the hash of the coverage mask.
        """
        if self._hitmap:
            self.coverage_hash = hash(frozenset(self._hitmap.viewkeys()))
        else:
            self.coverage_hash = 0

    #--------------------------------------------------------------------------
    # Coverage Mapping
    #--------------------------------------------------------------------------

    def _map_coverage(self):
        """
        Map loaded coverage data to the given database metadata.
        """

        # re-map any unmapped coverage to nodes
        dirty_nodes = self._map_nodes()

        # re-map nodes to functions
        dirty_functions = self._map_functions(dirty_nodes)

        # return the modified objects
        return (dirty_nodes, dirty_functions)

    def _map_nodes(self):
        """
        Map loaded runtime data to database defined nodes (basic blocks).
        """
        dirty_nodes = {}
        addresses_to_map = collections.deque(sorted(self._unmapped_data))

        #
        # This while loop is the core of our coverage mapping process.
        #
        # The '_unmapped_data' list is consumed by this loop, mapping
        # any unmapped runtime data maintained by this DatabaseCoverage
        # to the given database metadata.
        #
        # It should be noted that the rest of the database coverage
        # mapping (eg functions) gets built ontop of the mappings we build
        # for nodes here using the more or less raw/recycled runtime data.
        #

        while addresses_to_map:

            # get the next address to map
            address = addresses_to_map.popleft()

            # get the node (basic block) that contains this address
            try:
                node_metadata = self._metadata.get_node(address)

            #
            # failed to locate the node (basic block) for this address.
            # this address must not fall inside of a defined function...
            #

            except ValueError:
                continue

            #
            # we found applicable node metadata for this address, now try
            # to find the mapping object for this node address
            #

            if node_metadata.address in self.nodes:
                node_coverage = self.nodes[node_metadata.address]

            #
            # failed to locate a node coverage object, looks like this is
            # the first time we have identiied coverage for this node.
            # create a coverage node object and use it now.
            #

            else:
                node_coverage = NodeCoverage(node_metadata.address, self._weak_self)
                self.nodes[node_metadata.address] = node_coverage

            # compute the basic block end now to reduce overhead in the loop below
            node_end = node_metadata.address + node_metadata.size

            #
            # the loop below can be thought of almost as an inlined fast-path
            # where we expect the next several addresses to belong to the same
            # node (basic block).
            #
            # with the assumption of linear program execution, we can reduce
            # the heavier overhead of all the lookup code above by simply
            # checking if the next address in the queue (addresses_to_map)
            # falls into the same / current node (basic block).
            #
            # we can simply re-use the current node and its coverage object
            # until the next address to be processed falls outside our scope
            #

            while 1:

                #
                # map the hitmap data for the current address (an instruction)
                # to this node mapping and mark the instruction as mapped by
                # discarding its address from the unmapped data list
                #

                if address in node_metadata.instructions:
                    node_coverage.executed_instructions[address] = self._hitmap[address]
                    self._unmapped_data.discard(address)

                # get the next address to attempt mapping on
                address = addresses_to_map.popleft()

                #
                # if the next address is not in this node, it's time break out
                # of this loop and send it through the full node lookup path
                #

                if not (node_metadata.address <= address < node_end):
                    addresses_to_map.appendleft(address)
                    break

                #
                # the next address to be mapped DOES fall within our current
                # node, loop back around in the fast-path and map it
                #

                # ...

            # since we updated this node, ensure we're tracking it as dirty
            dirty_nodes[node_metadata.address] = node_coverage

        # done
        return dirty_nodes

    def _map_functions(self, dirty_nodes):
        """
        Map loaded coverage data to database defined functions.
        """
        dirty_functions = {}

        #
        # thanks to the _map_nodes function, we now have a repository of
        # node coverage objects that are considered 'dirty' and can be used
        # precisely guide the generation of our function level coverage
        #

        for node_coverage in dirty_nodes.itervalues():

            #
            # using the node_coverage object, we retrieve its underlying
            # metadata so that we can perform a reverse lookup of the fun
            #

            function_metadata = self._metadata.nodes[node_coverage.address].function

            #
            # now we can add this node to its respective function level
            # coverage mapping
            #

            function_coverage = self.functions.get(function_metadata.address, None)

            #
            # if we failed to locate a function coverage object, it means
            # that this is the first time we have identified coverage for this
            # function. create a new coverage function object and use it now.
            #

            if not function_coverage:
                function_coverage = FunctionCoverage(function_metadata.address, self._weak_self)
                self.functions[function_metadata.address] = function_coverage

            # mark this node as executed in the function level mappping
            function_coverage.mark_node(node_coverage)
            dirty_functions[function_metadata.address] = function_coverage

            # end of nodes loop

        # done
        return dirty_functions

    def _unmap_all(self):
        """
        Unmap all mapped data.
        """
        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(idaapi.BADADDR)
        self.nodes     = {}
        self.functions = {}

    def _unmap_delta(self, delta):
        """
        Unmap node & function coverage affected by the metadata delta.

        The metadata delta tells us exactly which parts of the database
        changed since our last coverage mapping. This function surgically
        unmaps the pieces of our coverage that may now be stale.

        This enables us to recompute only what is necessary upon refresh.
        """
        self._unmap_nodes(itertools.chain(delta.nodes_removed, delta.nodes_modified))
        self._unmap_functions(delta.functions_removed)

    def _unmap_nodes(self, node_addresses):
        """
        Unmap any data associated with a given list of node addresses.
        """

        #
        # using the metdata delta as a guide, we loop through all the nodes it
        # has noted as either modified, or deleted. it is in our best interest
        # unmap any of these dirty (stale) node addresses in OUR coverage
        # mapping so we can selectively regenerate their coverage later.
        #

        for node_address in node_addresses:

            #
            # if there's no coverage for this node, then we have nothing to do.
            # continue on to the next dirty node address
            #

            node_coverage = self.nodes.pop(node_address, None)
            if not node_coverage:
                continue

            # the node was found, unmap any of its tracked coverage blocks
            self._unmapped_data.update(
                node_coverage.executed_instructions.viewkeys()
            )

    def _unmap_functions(self, function_addresses):
        """
        Unmap any data associated with a given list of function addresses.
        """
        for function_address in function_addresses:
            self.functions.pop(function_address, None)

    #--------------------------------------------------------------------------
    # Debug
    #--------------------------------------------------------------------------

    def dump_unmapped(self):
        """
        Dump the unmapped coverage data.
        """
        lmsg("Unmapped Coverage:")
        for address in self._unmapped_data:
            lmsg(" * 0x%X" % address)
Пример #6
0
    def __init__(self, palette):

        # color palette
        self._palette = palette

        # database metadata cache
        self._database_metadata = DatabaseMetadata()

        # flag to suspend/resume the automatic coverage aggregation
        self._aggregation_suspended = False

        #----------------------------------------------------------------------
        # Coverage
        #----------------------------------------------------------------------

        # the name of the active coverage (eg filename)
        self.coverage_name = NEW_COMPOSITION

        # loaded or composed database coverage mappings
        self._database_coverage = collections.OrderedDict()

        # a NULL / empty coverage set
        self._NULL_COVERAGE = DatabaseCoverage(None, palette)

        #
        # the director automatically maintains or generates a few coverage
        # sets of its own. these are not directly modifiable by the user,
        # but may be influenced by user actions, or loaded coverage data.
        #
        # NOTE: The ordering of the dict below is the order that its items
        # will be shown in lists such as UI dropwdowns, etc.
        #

        self._special_coverage = collections.OrderedDict(
        [
            (HOT_SHELL,       DatabaseCoverage(None, palette)), # hot shell composition
            (NEW_COMPOSITION, DatabaseCoverage(None, palette)), # slow shell composition
            (AGGREGATE,       DatabaseCoverage(None, palette)), # aggregate composition
        ])

        #----------------------------------------------------------------------
        # Aliases
        #----------------------------------------------------------------------
        #
        #   Within the director, one is allowed to alias the names of the
        #   loaded coverage data it maintains. right now this is only used
        #   to assign shorthand names to coverage data.
        #
        #   in the future, this can be used for more fun/interesting user
        #   mappings and aliases :-)
        #

        #
        # mapping of alias --> coverage_name
        #   eg: 'A' --> 'my_loaded_coverage.log'
        #

        self._alias2name = {}

        #
        # mapping of coverage_name --> set(aliases)
        #   eg: 'my_loaded_coverage.log' --> set('A', 'log1', 'foo')
        #

        self._name2alias = collections.defaultdict(set)

        #
        # shorthand 'symbols' are aliases that the director automatically
        # assigns to database coverage objects. these special aliases
        # consist of a single capital letter, eg 'A'
        #
        # these auto-aliased shorthand symbols were intended to be a less
        # cumbersome way to reference specific coverage sets while composing.
        #
        # Example -
        #
        #  given these shorthand aliases:
        #
        #   'A' --> 'drcov.boombox.exe.04936.0000.proc.log'
        #   'B' --> 'drcov.boombox.exe.03297.0000.proc.log'
        #   'C' --> 'drcov.boombox.exe.08438.0000.proc.log'
        #   'D' --> 'drcov.boombox.exe.02349.0000.proc.log'
        #   ...
        #   'Z' --> 'drcov.boombox.exe.50946.0000.proc.log'
        #   <eof>
        #
        #  one can more naturally compose interesting equations
        #
        #   ((A & B) | (D & (E - F))) | Z
        #
        # the existing limitation of shorthand symbols is that there is
        # only 26 (A-Z) aliases that can be assigned to coverage sets. There
        # is no immediate plans to further expand this range.
        #
        # the primary justification for this limitation is that I don't
        # expect users to be building complex compositions with 26+ coverage
        # sets loaded at once. At that point, shorthand aliases really
        # aren't going to make things any less cumbersome.
        #

        self._shorthand = collections.deque(ASCII_SHORTHAND)

        #
        # assign default aliases
        #

        # alias the aggregate set to '*'
        self._alias_coverage(AGGREGATE, AGGREGATE_ALIAS)

        #----------------------------------------------------------------------
        # Async
        #----------------------------------------------------------------------

        self._ast_queue = Queue.Queue()
        self._composition_cache = CompositionCache()

        self._composition_worker = threading.Thread(
            target=self._async_evaluate_ast,
            name="EvaluateAST"
        )
        self._composition_worker.start()

        #----------------------------------------------------------------------
        # Callbacks
        #----------------------------------------------------------------------
        #
        #   As the director is the data source for much of Lighthouse, it
        #   is important that anything built ontop of it can act on key
        #   events or changes to the underlying data they consume.
        #
        #   Callbacks provide a way for us to notify any interested parties
        #   of these key events.
        #

        # lists of registered notification callbacks, see 'Callbacks' below
        self._coverage_switched_callbacks = []
        self._coverage_modified_callbacks = []
        self._coverage_created_callbacks  = []
        self._coverage_deleted_callbacks  = []
Пример #7
0
    def __init__(self, palette):

        # the plugin color palette
        self._palette = palette

        # the central database metadata cache
        self.metadata = DatabaseMetadata()

        #----------------------------------------------------------------------
        # Coverage
        #----------------------------------------------------------------------

        # the name of the active coverage
        self.coverage_name = NEW_COMPOSITION

        # a map of loaded or composed database coverages
        self._database_coverage = collections.OrderedDict()

        #
        # the director automatically maintains / generates a few coverage sets
        # of its own. these are not directly modifiable by the user, but may
        # be influenced by user actions (say, loading new coverage data)
        #
        # Note that the ordering of the dict below is the order that its items
        # will be shown in lists such as the CoverageComboBox dropwdown, etc.
        #

        self._special_coverage = collections.OrderedDict(
        [
            (HOT_SHELL,       DatabaseCoverage(palette, HOT_SHELL)),
            (NEW_COMPOSITION, DatabaseCoverage(palette, NEW_COMPOSITION)),
            (AGGREGATE,       DatabaseCoverage(palette, AGGREGATE)),
        ])

        # a flag to suspend/resume the automatic coverage aggregation
        self._aggregation_suspended = False

        #----------------------------------------------------------------------
        # Aliases
        #----------------------------------------------------------------------

        #
        # Within the director, one is allowed to alias the names of the loaded
        # coverage data that it maintains. right now this is only used to
        # assign shorthand names to coverage data.
        #
        # mapping of {alias: coverage_name}
        #   eg: 'A' --> 'my_loaded_coverage.log'
        #

        self._alias2name = {}

        #
        # mapping of {coverage_name: set(aliases)}
        #   eg: 'my_loaded_coverage.log' --> set(['A', 'log1', 'foo'])
        #

        self._name2alias = collections.defaultdict(set)

        #
        # shorthand 'symbols' are aliases that the director automatically
        # assigns to loaded database coverage mappings. these special aliases
        # consist of a single capital letter, eg 'A'
        #
        # these auto-aliased shorthand symbols were intended to be a less
        # cumbersome way to reference specific coverage sets while composing.
        #
        # Example -
        #
        #  given these shorthand aliases:
        #
        #   'A' --> 'drcov.boombox.exe.04936.0000.proc.log'
        #   'B' --> 'drcov.boombox.exe.03297.0000.proc.log'
        #   'C' --> 'drcov.boombox.exe.08438.0000.proc.log'
        #   'D' --> 'drcov.boombox.exe.02349.0000.proc.log'
        #   ...
        #   'Z' --> 'drcov.boombox.exe.50946.0000.proc.log'
        #   <eof>
        #
        #  one can more naturally compose interesting coverage equations
        #
        #   ((A & B) | (D & (E - F))) | Z
        #
        # the existing limitation of shorthand symbols is that there is
        # only 26 (A-Z) aliases that can be assigned to coverage sets. There
        # is no immediate plans to further expand this range.
        #
        # the primary justification for this limitation is that I don't
        # expect users to be building complex compositions with 26+ coverage
        # sets loaded at once. At that point, shorthand aliases really
        # aren't going to make things any less cumbersome.
        #

        self._shorthand = collections.deque(ASCII_SHORTHAND)

        #
        # assign default aliases
        #

        # alias the aggregate set to '*'
        self._alias_coverage(AGGREGATE, AGGREGATE_ALIAS)

        #----------------------------------------------------------------------
        # Async Composition Computation
        #----------------------------------------------------------------------

        #
        # the director is responsible for computing the logical/arithmetic
        # results of coverage set operations (composing). thanks to our lifted
        # metadata, we can do these set computations completely asynchronously.
        #
        # we use locks, queues, and a background 'composition worker' thread
        # to handle these computation requests.
        #

        self._ast_queue = Queue.Queue()
        self._composition_lock = threading.Lock()
        self._composition_cache = CompositionCache()

        self._composition_worker = threading.Thread(
            target=self._async_evaluate_ast,
            name="EvaluateAST"
        )
        self._composition_worker.start()

        #----------------------------------------------------------------------
        # Callbacks
        #----------------------------------------------------------------------

        #
        # as the director is the data source for much of Lighthouse, it is
        # important that anything built on top of it can act on key events or
        # changes to the underlying data they consume.
        #
        # callbacks provide a way for us to notify any interested parties of
        # these key events. Below are lists of registered notification
        # callbacks. see 'Callbacks' section below for more info.
        #

        # coverage callbacks
        self._coverage_switched_callbacks = []
        self._coverage_modified_callbacks = []
        self._coverage_created_callbacks  = []
        self._coverage_deleted_callbacks  = []

        # metadata callbacks
        self._metadata_modified_callbacks = []
Пример #8
0
class CoverageDirector(object):
    """
    The CoverageDirector manages loaded coverage, and coverage composition.

    This class is the 'brain' of Lighthouse. Its primary role is to centralize
    loaded coverage and switch between which set is 'active'. It also houses
    the logic to perform set operations between loaded coverage.

    This provides a platform for researchers to explore the relationship
    between any number of coverage files.
    """

    ERROR_COVERAGE_ABSENT = 1
    ERROR_COVERAGE_SUSPICIOUS = 2

    def __init__(self, palette):

        # the plugin color palette
        self._palette = palette

        # the central database metadata cache
        self.metadata = DatabaseMetadata()

        #----------------------------------------------------------------------
        # Coverage
        #----------------------------------------------------------------------

        # the name of the active coverage
        self.coverage_name = NEW_COMPOSITION

        # a map of loaded or composed database coverages
        self._database_coverage = collections.OrderedDict()

        #
        # the director automatically maintains / generates a few coverage sets
        # of its own. these are not directly modifiable by the user, but may
        # be influenced by user actions (say, loading new coverage data)
        #
        # Note that the ordering of the dict below is the order that its items
        # will be shown in lists such as the CoverageComboBox dropwdown, etc.
        #

        self._special_coverage = collections.OrderedDict(
        [
            (HOT_SHELL,       DatabaseCoverage(palette, HOT_SHELL)),
            (NEW_COMPOSITION, DatabaseCoverage(palette, NEW_COMPOSITION)),
            (AGGREGATE,       DatabaseCoverage(palette, AGGREGATE)),
        ])

        # a flag to suspend/resume the automatic coverage aggregation
        self._aggregation_suspended = False

        #----------------------------------------------------------------------
        # Aliases
        #----------------------------------------------------------------------

        #
        # Within the director, one is allowed to alias the names of the loaded
        # coverage data that it maintains. right now this is only used to
        # assign shorthand names to coverage data.
        #
        # mapping of {alias: coverage_name}
        #   eg: 'A' --> 'my_loaded_coverage.log'
        #

        self._alias2name = {}

        #
        # mapping of {coverage_name: set(aliases)}
        #   eg: 'my_loaded_coverage.log' --> set(['A', 'log1', 'foo'])
        #

        self._name2alias = collections.defaultdict(set)

        #
        # shorthand 'symbols' are aliases that the director automatically
        # assigns to loaded database coverage mappings. these special aliases
        # consist of a single capital letter, eg 'A'
        #
        # these auto-aliased shorthand symbols were intended to be a less
        # cumbersome way to reference specific coverage sets while composing.
        #
        # Example -
        #
        #  given these shorthand aliases:
        #
        #   'A' --> 'drcov.boombox.exe.04936.0000.proc.log'
        #   'B' --> 'drcov.boombox.exe.03297.0000.proc.log'
        #   'C' --> 'drcov.boombox.exe.08438.0000.proc.log'
        #   'D' --> 'drcov.boombox.exe.02349.0000.proc.log'
        #   ...
        #   'Z' --> 'drcov.boombox.exe.50946.0000.proc.log'
        #   <eof>
        #
        #  one can more naturally compose interesting coverage equations
        #
        #   ((A & B) | (D & (E - F))) | Z
        #
        # the existing limitation of shorthand symbols is that there is
        # only 26 (A-Z) aliases that can be assigned to coverage sets. There
        # is no immediate plans to further expand this range.
        #
        # the primary justification for this limitation is that I don't
        # expect users to be building complex compositions with 26+ coverage
        # sets loaded at once. At that point, shorthand aliases really
        # aren't going to make things any less cumbersome.
        #

        self._shorthand = collections.deque(ASCII_SHORTHAND)

        #
        # assign default aliases
        #

        # alias the aggregate set to '*'
        self._alias_coverage(AGGREGATE, AGGREGATE_ALIAS)

        #----------------------------------------------------------------------
        # Async Composition Computation
        #----------------------------------------------------------------------

        #
        # the director is responsible for computing the logical/arithmetic
        # results of coverage set operations (composing). thanks to our lifted
        # metadata, we can do these set computations completely asynchronously.
        #
        # we use locks, queues, and a background 'composition worker' thread
        # to handle these computation requests.
        #

        self._ast_queue = Queue.Queue()
        self._composition_lock = threading.Lock()
        self._composition_cache = CompositionCache()

        self._composition_worker = threading.Thread(
            target=self._async_evaluate_ast,
            name="EvaluateAST"
        )
        self._composition_worker.start()

        #----------------------------------------------------------------------
        # Callbacks
        #----------------------------------------------------------------------

        #
        # as the director is the data source for much of Lighthouse, it is
        # important that anything built on top of it can act on key events or
        # changes to the underlying data they consume.
        #
        # callbacks provide a way for us to notify any interested parties of
        # these key events. Below are lists of registered notification
        # callbacks. see 'Callbacks' section below for more info.
        #

        # coverage callbacks
        self._coverage_switched_callbacks = []
        self._coverage_modified_callbacks = []
        self._coverage_created_callbacks  = []
        self._coverage_deleted_callbacks  = []

        # metadata callbacks
        self._metadata_modified_callbacks = []

    def terminate(self):
        """
        Cleanup & terminate the director.
        """

        # stop the composition worker
        self._ast_queue.put(None)
        self._composition_worker.join()

        # spin down the live metadata object
        self.metadata.terminate()

    #--------------------------------------------------------------------------
    # Properties
    #--------------------------------------------------------------------------

    @property
    def coverage(self):
        """
        Return the active database coverage.
        """
        return self.get_coverage(self.coverage_name)

    @property
    def aggregate(self):
        """
        Return the database coverage aggregate.
        """
        return self._special_coverage[AGGREGATE]

    @property
    def coverage_names(self):
        """
        Return the list or loaded / composed database coverage names.
        """
        return self._database_coverage.keys()

    @property
    def special_names(self):
        """
        Return the list of special (director maintained) coverage names.
        """
        return self._special_coverage.keys()

    @property
    def all_names(self):
        """
        Return the names of both special & loaded/composed coverage data.
        """
        return self.coverage_names + self.special_names

    #----------------------------------------------------------------------
    # Callbacks
    #----------------------------------------------------------------------

    def coverage_switched(self, callback):
        """
        Subscribe a callback for coverage switch events.
        """
        register_callback(self._coverage_switched_callbacks, callback)

    def _notify_coverage_switched(self):
        """
        Notify listeners of a coverage switch event.
        """
        notify_callback(self._coverage_switched_callbacks)

    def coverage_modified(self, callback):
        """
        Subscribe a callback for coverage modification events.
        """
        register_callback(self._coverage_modified_callbacks, callback)

    def _notify_coverage_modified(self):
        """
        Notify listeners of a coverage modification event.
        """
        notify_callback(self._coverage_modified_callbacks)

    def coverage_created(self, callback):
        """
        Subscribe a callback for coverage creation events.
        """
        register_callback(self._coverage_created_callbacks, callback)

    def _notify_coverage_created(self):
        """
        Notify listeners of a coverage creation event.

        TODO/FUTURE: send list of names created?
        """
        notify_callback(self._coverage_created_callbacks)

    def coverage_deleted(self, callback):
        """
        Subscribe a callback for coverage deletion events.
        """
        register_callback(self._coverage_deleted_callbacks, callback)

    def _notify_coverage_deleted(self):
        """
        Notify listeners of a coverage deletion event.

        TODO/FUTURE: send list of names deleted?
        """
        notify_callback(self._coverage_deleted_callbacks)

    def metadata_modified(self, callback):
        """
        Subscribe a callback for metadata modification events.
        """
        register_callback(self._metadata_modified_callbacks, callback)

    def _notify_metadata_modified(self):
        """
        Notify listeners of a metadata modification event.
        """
        notify_callback(self._metadata_modified_callbacks)

    #----------------------------------------------------------------------
    # Batch Loading
    #----------------------------------------------------------------------

    def resume_aggregation(self):
        """
        Resume automatic updating of the coverage aggregate.
        """
        assert self._aggregation_suspended
        self._refresh_aggregate()
        self._aggregation_suspended = False

    def suspend_aggregation(self):
        """
        Suspend the coverage aggregate from being automatically updated.

        It is performant to suspend/resume aggregation if loading a number
        of individual coverage files. This will prevent the aggregate
        coverage set from being re-computed multiple times.
        """
        self._aggregation_suspended = True

    #----------------------------------------------------------------------
    # Coverage Creation
    #----------------------------------------------------------------------

    def create_coverage(self, coverage_name, coverage_data, coverage_filepath=None):
        """
        Create a new database coverage mapping from the given data.
        """
        return self.update_coverage(coverage_name, coverage_data, coverage_filepath)

    def create_coverage_from_drcov_list(self, drcov_list):
        """
        Create a number of database coverage mappings from a list of DrcovData.

        Returns a tuple of (created_coverage, errors)
        """
        created_coverage = []
        errors = []

        #
        # stop the director's aggregate from updating. this will prevent the
        # aggregate from recomputing after each individual mapping is created.
        # instead, we will wait till *all* have been created, computing the
        # new aggregate at the very end. this is far more performant.
        #

        self.suspend_aggregation()

        #
        # loop through the coverage data we been given (drcov_list), and begin
        # the normalization process to translate / filter / flatten its blocks
        # into a generic format the director can consume (a list of addresses)
        #

        for i, drcov_data in enumerate(drcov_list, 1):

            # keep the user informed about our progress while loading coverage
            disassembler.replace_wait_box(
                "Normalizing and mapping coverage %u/%u" % (i, len(drcov_list))
            )

            #
            # translate the coverage data's basic block addresses to the
            # imagebase of the open database, and flatten the blocks to a
            # list of instruction addresses
            #

            try:
                coverage_data = self._normalize_drcov_data(drcov_data)
            except ValueError as e:
                errors.append((self.ERROR_COVERAGE_ABSENT, drcov_data.filepath))
                lmsg("Failed to normalize coverage %s" % drcov_data.filepath)
                lmsg("- %s" % e)
                continue

            #
            # before injecting the new coverage data (now a list of instruction
            # addresses), we check to see if there is an existing coverage
            # object under the same name.
            #
            # if there is an existing coverage mapping, odds are that the user
            # is probably re-loading the same coverage file in which case we
            # simply overwrite the old DatabaseCoverage object.
            #
            # but we have to be careful for the case where the user loads a
            # coverage file from a different directory, but under the same name
            #
            # e.g:
            #  - C:\coverage\foo.log
            #  - C:\coverage\testing\foo.log
            #
            # in these cases, we will append a suffix to the new coverage file
            #

            coverage_name = os.path.basename(drcov_data.filepath)
            coverage = self.get_coverage(coverage_name)

            # assign a suffix to the coverage name in the event of a collision
            if coverage and coverage.filepath != drcov_data.filepath:
                for i in xrange(2,0x100000):
                    new_name = "%s_%u" % (coverage_name, i)
                    if not self.get_coverage(new_name):
                        break
                coverage_name = new_name

            #
            # finally, we can ask the director to create a coverage mapping
            # from the data we have pre-processed for it
            #

            coverage = self.create_coverage(
                coverage_name,
                coverage_data,
                drcov_data.filepath
            )
            created_coverage.append(coverage_name)

            # warn when loaded coverage appears to be poorly mapped (suspicious)
            if coverage.suspicious:
                errors.append((self.ERROR_COVERAGE_SUSPICIOUS, drcov_data.filepath))
                lmsg("Badly mapped coverage %s" % drcov_data.filepath)

            # warn when loaded coverage (for this module) appears to be empty
            if not len(coverage.nodes):
                errors.append((self.ERROR_COVERAGE_ABSENT, drcov_data.filepath))
                lmsg("No relevant coverage data in %s" % drcov_data.filepath)

        #
        # resume the director's aggregation service, triggering an update to
        # recompute the aggregate with the newly loaded coverage
        #

        disassembler.replace_wait_box("Recomputing coverage aggregate...")
        self.resume_aggregation()

        # done
        return (created_coverage, errors)

    def _normalize_drcov_data(self, drcov_data):
        """
        Extract and normalize relevant coverage data from a DrcovData object.

        Returns a list of executed instruction addresses for this database.
        """

        # extract the coverage relevant to this database (well, the root binary)
        root_filename = self.metadata.filename
        coverage_blocks = drcov_data.get_blocks_by_module(root_filename)

        # rebase the coverage log's basic blocks to the database imagebase
        imagebase = self.metadata.imagebase
        rebased_blocks = rebase_blocks(imagebase, coverage_blocks)

        # coalesce the blocks into larger contiguous blobs
        condensed_blocks = coalesce_blocks(rebased_blocks)

        # flatten the blobs into individual instruction addresses
        return self.metadata.flatten_blocks(condensed_blocks)

    def aggregate_drcov_batch(self, drcov_list):
        """
        Aggregate a given list of DrcovData into a single coverage mapping.

        See create_coverage_from_drcov_list(...) for more verbose comments.
        """
        errors = []

        # create a new coverage set to manually aggregate data into
        coverage = DatabaseCoverage(self._palette)

        for i, drcov_data in enumerate(drcov_list, 1):

            # keep the user informed about our progress while aggregating
            disassembler.replace_wait_box(
                "Aggregating batch data %u/%u" % (i, len(drcov_list))
            )

            # normalize coverage data to the open database
            try:
                addresses = self._normalize_drcov_data(drcov_data)
            except Exception as e:
                errors.append((self.ERROR_COVERAGE_ABSENT, drcov_data.filepath))
                lmsg("Failed to normalize coverage %s" % drcov_data.filepath)
                lmsg("- %s" % e)
                continue

            # aggregate the addresses into the output coverage mapping
            coverage.add_addresses(addresses, False)

        # return the created coverage name
        return (coverage, errors)

    #----------------------------------------------------------------------
    # Coverage Management
    #----------------------------------------------------------------------

    def select_coverage(self, coverage_name):
        """
        Activate a loaded coverage mapping by name.
        """
        logger.debug("Selecting coverage %s" % coverage_name)

        # ensure a coverage mapping actually exists for the given coverage_name
        if not (coverage_name in self.all_names):
            raise ValueError("No coverage matching '%s' was found" % coverage_name)

        # if the given name is already active, there's nothing to do
        if self.coverage_name == coverage_name:
            return

        #
        # save the given coverage_name as the active name. this effectively
        # changes which coverage mapping the director considers active.
        #

        self.coverage_name = coverage_name

        # notify any listeners that we have switched our active coverage
        self._notify_coverage_switched()

    def update_coverage(self, coverage_name, coverage_data, coverage_filepath=None):
        """
        Create or update a databases coverage mapping.
        """
        assert not (coverage_name in RESERVED_NAMES)
        updating_coverage = coverage_name in self.coverage_names

        if updating_coverage:
            logger.debug("Updating coverage %s" % coverage_name)
        else:
            logger.debug("Adding coverage %s" % coverage_name)

        # create a new database coverage mapping from the given coverage data
        new_coverage = DatabaseCoverage(
            self._palette,
            coverage_name,
            coverage_filepath,
            coverage_data
        )
        new_coverage.update_metadata(self.metadata)
        new_coverage.refresh()

        #
        # coverage mapping complete, looks like we're good. commit the new
        # coverage to the director's coverage table and surface it for use.
        #
        # note that this will overwrite an existing coverage mapping present
        # under the same name
        #

        self._commit_coverage(coverage_name, new_coverage)

        # assign a shorthand alias (if available) to new coverage additions
        if not updating_coverage:
            self._request_shorthand_alias(coverage_name)

        # notify any listeners that we have added or updated coverage
        if updating_coverage:
            self._notify_coverage_modified()
        else:
            self._notify_coverage_created()

        # return the created/updated coverage
        return new_coverage

    def _commit_coverage(self, coverage_name, new_coverage):
        """
        Internal add/update of coverage.

        This will automatically update the director's aggregate.
        """

        #
        # if there exists a coverage mapping under the given coverage_name we
        # are trying to add/update, we first must remove anything it has
        # contributed to the aggregate before we dispose of its data
        #

        if coverage_name in self.coverage_names:
            old_coverage = self._database_coverage[coverage_name]
            self.aggregate.subtract_data(old_coverage.data)
            if not self._aggregation_suspended:
                self._refresh_aggregate()

        #
        # this is the critical point where we actually integrate the newly
        # built coverage into the director or replacing an existing entry
        #

        self._database_coverage[coverage_name] = new_coverage

        # (re)-add the newly loaded/updated coverage data to the aggregate
        self.aggregate.add_data(new_coverage.data)
        if not self._aggregation_suspended:
            self._refresh_aggregate()

    def delete_coverage(self, coverage_name):
        """
        Delete a database coverage mapping by name.
        """

        #
        # if the delete request targets the currently active coverage, we want
        # to switch into a safer coverage set to try and avoid any ill effects.
        #

        if coverage_name in [self.coverage_name, AGGREGATE]:
            self.select_coverage(NEW_COMPOSITION)

        # attempt to delete the requested coverage_name
        if coverage_name in self.coverage_names:
            self._delete_user_coverage(coverage_name)
        elif coverage_name == AGGREGATE:
            self._delete_aggregate_coverage()
        else:
            raise ValueError("Cannot delete %s, does not exist" % coverage_name)

        # notify any listeners that we have deleted coverage
        self._notify_coverage_deleted()

    def _delete_user_coverage(self, coverage_name):
        """
        Delete a user created database coverage mapping by name.
        """

        # release the shorthand alias held by this coverage
        self._release_shorthand_alias(coverage_name)

        # remove the database coverage mapping from the director's coverage map
        coverage = self._database_coverage.pop(coverage_name)
        # TODO/FUTURE: check if there's any references to the coverage object?

        # remove the coverage data this mapping contributed to the aggregate
        self.aggregate.subtract_data(coverage.data)
        if not self._aggregation_suspended:
            self._refresh_aggregate()

    def _delete_aggregate_coverage(self):
        """
        Delete the aggregate set, effectively clearing all loaded coverage.
        """

        # loop through all the loaded coverage sets and release them
        for coverage_name in self.coverage_names:
            self._release_shorthand_alias(coverage_name)
            self._database_coverage.pop(coverage_name)
        # TODO/FUTURE: check if there's any references to the coverage aggregate?

        # assign a new, blank aggregate set
        self._special_coverage[AGGREGATE] = DatabaseCoverage(self._palette, AGGREGATE)
        self._refresh_aggregate() # probably not needed

    def get_coverage(self, name):
        """
        Retrieve coverage data for the requested coverage_name.
        """

        # if the given name was an alias, this will dereference it
        coverage_name = self._alias2name.get(name, name)

        # attempt to retrieve the requested coverage
        if coverage_name in self.coverage_names:
            return self._database_coverage[coverage_name]
        if coverage_name in self.special_names:
            return self._special_coverage[coverage_name]

        # could not locate coverage
        return None

    def get_coverage_string(self, coverage_name, color=False):
        """
        Retrieve a detailed coverage string for the given coverage_name.
        """

        # special cases that should be static
        if coverage_name == HOT_SHELL or coverage_name == NEW_COMPOSITION:
            return coverage_name

        symbol = self.get_shorthand(coverage_name)
        coverage = self.get_coverage(coverage_name)

        # compute coverage percent & render it in string form
        percent = coverage.instruction_percent*100
        percent_str = "%5.2f" % percent

        #
        # build and return a generic detailed coverage string
        #   eg: 'A - 73.45% - drcov.boombox.exe.03820.0000.proc.log'
        #

        if color:

            # color the symbol token like the shell
            symbol = color_text(symbol, self._palette.coverage_token)

            # low coverage color
            if percent < 30.0:
                percent_str = color_text(percent_str, self._palette.coverage_bad)

            # okay coverage color
            elif percent < 60.0:
                percent_str = color_text(percent_str, self._palette.coverage_okay)

            # good coverage color
            else:
                percent_str = color_text(percent_str, self._palette.coverage_good)

        return "%s - %s%% - %s" % (symbol, percent_str, coverage_name)

    #----------------------------------------------------------------------
    # Aliases
    #----------------------------------------------------------------------

    def alias_coverage(self, coverage_name, alias):
        """
        Assign an alias to a loaded database coverage mapping.
        """
        assert not (alias in self.all_names)
        assert not (alias in RESERVED_NAMES)
        self._alias_coverage(coverage_name, alias)

    def _alias_coverage(self, coverage_name, alias):
        """
        Assign alias with no restrictions. Internal use only.
        """

        #
        # if we are overwriting a known alias, we should remove its
        # inverse mapping reference in the name --> [aliases] map first
        #

        if alias in self._alias2name:
            self._name2alias[self._alias2name[alias]].remove(alias)

        # save the new alias
        self._alias2name[alias] = coverage_name
        self._name2alias[coverage_name].add(alias)

    def get_aliases(self, coverage_name):
        """
        Retrieve alias set for the requested coverage_name.
        """
        return self._name2alias[coverage_name]

    def get_shorthand(self, coverage_name):
        """
        Retrieve shorthand symbol for the requested coverage.
        """

        # reduce the coverage's aliases to only shorthand candidates
        try:
            shorthand = self._name2alias[coverage_name] & SHORTHAND_ALIASES
        except KeyError:
            return None

        # there should only ever be one shorthand symbol for a given coverage
        assert len(shorthand) < 2

         # pop the single shorthand symbol (if one is even aliased)
        try:
            return shorthand.pop()
        except KeyError:
            return None

    def peek_shorthand(self):
        """
        Peek at the next available shorthand symbol.
        """
        try:
            return self._shorthand[0]
        except IndexError:
            return None

    def _request_shorthand_alias(self, coverage_name):
        """
        Assign the next shorthand A-Z alias to the given coverage.
        """
        logger.debug("Requesting shorthand alias for %s" % coverage_name)
        assert coverage_name in self.coverage_names

        # get the next available symbol (A-Z) from the shorthand pool
        try:
            symbol = self._shorthand.popleft()
        except IndexError:
            return None

        # alias the symbol to the given coverage_name & return it
        self._alias_coverage(coverage_name, symbol)
        return symbol

    def _release_shorthand_alias(self, coverage_name):
        """
        Release the shorthand alias of the given coverage_name.
        """
        logger.debug("Releasing shorthand alias for %s" % coverage_name)
        assert coverage_name in self.coverage_names

        # get the shorthand symbol for the given coverage
        symbol = self.get_shorthand(coverage_name)

        # if there was no symbol assigned, there's nothing to do
        if not symbol:
            return

        # delete the shorthand symbol from the alias maps
        self._name2alias[coverage_name].remove(symbol)
        self._alias2name.pop(symbol)

        # add the symbol back to the end of the shorthand pool
        self._shorthand.append(symbol)

        #
        # in the event that all shorthand aliases have been released back to
        # us, we rest the shorthand list so that new symbols will begin from
        # the start of the alphabet (A, B, C ...)
        #

        if len(self._shorthand) == len(ASCII_SHORTHAND):
            self._shorthand = collections.deque(ASCII_SHORTHAND)

    #----------------------------------------------------------------------
    # Composing
    #----------------------------------------------------------------------

    def add_composition(self, composite_name, ast):
        """
        Evaluate and add a new composition to the director.
        """
        assert not (composite_name in RESERVED_NAMES)
        updating_coverage = composite_name in self.coverage_names
        logger.debug("Adding composition %s" % composite_name)

        # evaluate the last AST into a coverage set
        composite_coverage = self._evaluate_composition(ast)

        # save the evaluated coverage under the given name
        self._commit_coverage(composite_name, composite_coverage)

        # assign a shorthand alias (if available) to new coverage additions
        if not updating_coverage:
            self._request_shorthand_alias(composite_name)

        # notify any listeners that we have added or updated coverage
        if updating_coverage:
            self._notify_coverage_modified()
        else:
            self._notify_coverage_created()

    def cache_composition(self, ast, force=False):
        """
        Evaluate & cache the given composition (asynchronously).
        """
        assert ast

        #
        # normally, we only pro-actively evaluate/cache if the hotshell is
        # active, but we can also allow the caller to force a cache to occur
        #

        if self.coverage_name == HOT_SHELL or force:
            self._ast_queue.put(ast)

    def _async_evaluate_ast(self):
        """
        Asynchronous composition evaluation worker loop.
        """
        logger.debug("Starting EvaluateAST thread...")

        while True:

            # get the next coverage expression (an AST) to evaluate
            ast = self._ast_queue.get()
            if ast == None:
                break

            # produce a single composite coverage mapping as described by the AST
            composite_coverage = self._evaluate_composition(ast)

            # we always save the most recent composite to the hotshell entry
            self._special_coverage[HOT_SHELL] = composite_coverage

            #
            # if the hotshell entry is the active coverage selection, notify
            # listeners of its update
            #

            if self.coverage_name == HOT_SHELL:
                self._notify_coverage_modified()

            # loop and wait for the next AST to evaluate

        # thread exit
        logger.debug("Exiting EvaluateAST thread...")

    def _evaluate_composition(self, ast):
        """
        Evaluate the coverage composition described by the AST.
        """

        # if the AST is effectively 'null', return a blank coverage set
        if isinstance(ast, TokenNull):
            return DatabaseCoverage(self._palette)

        #
        # the director's composition evaluation code (this function) is most
        # generally called via the background caching evaluation thread known
        # as self._composition_worker. But this function can also be called
        # inline via the 'add_composition' function from a different thread
        # (namely, the main thread)
        #
        # because of this, we must gate the resources that AST evaluation code
        # operates on behind a lock, restricting the code to one thread.
        #
        # should we call _evaluate_composition from the context of the main
        # thread, it is important that we do so in a pseudo non-blocking way
        # such that we don't hang the UI. await_lock(...) will allow the Qt
        # main thread to yield to other threads while waiting for the lock.
        #

        await_lock(self._composition_lock)

        # recursively evaluate the AST
        composite_coverage = self._evaluate_composition_recursive(ast)

        # map the composited coverage data to the database metadata
        composite_coverage.update_metadata(self.metadata)
        composite_coverage.refresh() # TODO/FUTURE: hash refresh?

        # done operating on shared data (coverage), release the lock
        self._composition_lock.release()

        # return the evaluated composition
        return composite_coverage

    def _evaluate_composition_recursive(self, node):
        """
        The internal (recursive) AST evaluation routine.
        """

        #
        # if the current AST node is a logic operator, we need to evaluate the
        # expressions that make up its input. only once each operand has been
        # concretized is it appropriate for us to operate on them
        #

        if isinstance(node, TokenLogicOperator):

            #
            # collect the left and right components of the logical operation
            #   eg:
            #       op1 = DatabaseCoverage for 'A'
            #       op2 = DatabaseCoverage for 'B'
            #

            op1 = self._evaluate_composition_recursive(node.op1)
            op2 = self._evaluate_composition_recursive(node.op2)

            #
            # before computing a new composition, we first compute a low-cost
            # 'hash' of the desired operation. this hash can be used to
            # identify an existing (eg, previously computed) result, retrieving
            # it from an LRU based cache that holds compositions created by the
            # AST evaluation process.
            #
            # the 'hash' is actually computed as a product of the operator
            # that would normally combine the two coverage sets.
            #
            # for example, when evaluating a coverage composition, the logical
            # operators (eg |, &, ^), it does not matter which side of the
            # equation the coverage components fall on.
            #
            #  eg:
            #      (A | B) == (B | A)
            #
            # while arithmetic operations (-) will produce different results
            #
            #      (A - B) != (B - A)
            #
            # so if we are being asked to compute a composition of (A | B),
            # we first compute:
            #
            #      composition_hash = hash(A) | hash(B)
            #
            # using the composition_hash, we can check the LRU cache for a
            # previous computation of the composition (A | B).
            #
            # the possibility of collisions are generally higher with this
            # form of 'hash', but I still expect them to be extremely rare...
            #

            composition_hash = node.operator(op1.coverage_hash, op2.coverage_hash)

            #
            # evaluating an AST produces lots of 'transient' compositions. To
            # mitigate unnecessary re-computation, we maintain a small LRU cache
            # of these compositions to draw from during subsequent evaluations.
            #
            #   eg:
            #       evaluating the input
            #
            #         (A | B) - (C | D)
            #
            #       produces
            #
            #         COMP_1 = (A | B)
            #         COMP_2 = (C | D)
            #         COMP_3 = COMP_1 - COMP_2
            #
            # in the example above, COMP_3 is the final evaluated result that
            # will be returned to the user, while COMP_1/COMP_2 would normally
            # be discarded. Instead, we cache all of these compositions
            # (1, 2, 3) as they may be useful to us in future evaluations.
            #
            # later, if the user then choses to evaluate (A | B) - (Z | D), our
            # cache can retrieve the fully computed (A | B) composition
            # assuming it has not been evicted.
            #
            # this makes Lighthouse far more performant for repeated operations
            #

            # check the cache to see if this composition was recently computed
            cached_coverage = self._composition_cache[composition_hash]

            # if the composition was found in the cache, return that for speed
            if cached_coverage:
                return cached_coverage

            #
            # using the collected components of the logical operation, we
            # compute the coverage mask defined by this TokenLogicOperator
            #

            coverage_mask = node.operator(op1.coverage, op2.coverage)

            #
            # now that we have computed the requested coverage mask (a bitmap),
            # we use the mask to generate a new DatabaseCoverage mapping.
            #

            new_composition = DatabaseCoverage(self._palette, data=coverage_mask)

            # cache & return the newly computed composition
            self._composition_cache[composition_hash] = new_composition
            return new_composition

        #
        # if the current AST node is a coverage range, we need to evaluate the
        # range expression. this will produce an aggregate coverage set
        # described by the start/end of the range (eg, 'A,D')
        #

        elif isinstance(node, TokenCoverageRange):
            return self._evaluate_coverage_range(node)

        #
        # if the current AST node is a coverage token, we need simply need to
        # return its associated DatabaseCoverage.
        #

        elif isinstance(node, TokenCoverageSingle):
            return self._evaluate_coverage(node)

        #
        # unknown token? (this should never happen)
        #

        raise ValueError("Invalid AST Token in Composition Tree")

    def _evaluate_coverage(self, coverage_token):
        """
        Evaluate a TokenCoverageSingle AST token.

        Returns an existing database coverage mapping.
        """
        assert isinstance(coverage_token, TokenCoverageSingle)
        return self.get_coverage(self._alias2name[coverage_token.symbol])

    def _evaluate_coverage_range(self, range_token):
        """
        Evaluate a TokenCoverageRange AST token.

        Returns a new aggregate database coverage mapping.
        """
        assert isinstance(range_token, TokenCoverageRange)

        # initialize output to a null coverage set
        output = DatabaseCoverage(self._palette)

        # expand 'A,Z' to ['A', 'B', 'C', ... , 'Z']
        symbols = [chr(x) for x in range(ord(range_token.symbol_start), ord(range_token.symbol_end) + 1)]

        # build a coverage aggregate described by the range of shorthand symbols
        for symbol in symbols:
            output.add_data(self.get_coverage(self._alias2name[symbol]).data)

        # return the computed coverage
        return output

    #----------------------------------------------------------------------
    # Refresh
    #----------------------------------------------------------------------

    def refresh(self):
        """
        Complete refresh of the director and mapped coverage.
        """
        logger.debug("Refreshing the CoverageDirector")

        # (re)build our metadata cache of the underlying database
        future = self.refresh_metadata(metadata_progress, True)
        await_future(future)

        # (re)map each set of loaded coverage data to the database
        self._refresh_database_coverage()

    def refresh_metadata(self, progress_callback=None, force=False):
        """
        Refresh the database metadata cache utilized by the director.

        Returns a future (Queue) that will carry the completion message.
        """

        #
        # if this is the first time the director is going to use / populate
        # the database metadata, register the director for notifications of
        # metadata modification (this should only happen once)
        #
        # TODO/FUTURE: this is a little dirty, but it will suffice.
        #

        if not self.metadata.cached:
            self.metadata.function_renamed(self._notify_metadata_modified)

        #
        # if the lighthouse has collected metadata previously for this
        # disassembler session (eg, it is cached), ignore a request to refresh
        # it unless explicitly told to refresh via force=True
        #

        if self.metadata.cached and not force:
            fake_queue = Queue.Queue()
            fake_queue.put(False)
            return fake_queue

        # start the asynchronous metadata refresh
        result_queue = self.metadata.refresh(progress_callback=progress_callback)

        # return the queue that can be used to block for the async result
        return result_queue

    def _refresh_database_coverage(self):
        """
        Refresh all the database coverage mappings managed by the director.
        """
        logger.debug("Refreshing database coverage mappings")

        for i, name in enumerate(self.all_names, 1):
            logger.debug(" - %s" % name)
            disassembler.replace_wait_box(
                "Refreshing coverage mapping %u/%u" % (i, len(self.all_names))
            )
            coverage = self.get_coverage(name)
            coverage.update_metadata(self.metadata)
            coverage.refresh()

    def _refresh_aggregate(self):
        """
        Refresh the aggregate database coverage mapping.
        """
        self.aggregate.update_metadata(self.metadata)
        self.aggregate.refresh()
Пример #9
0
    def __init__(self, palette, name="", filepath=None, data=None):

        # color palette
        self.palette = palette

        # the name of the DatabaseCoverage object
        self.name = name

        # the filepath this coverage data was sourced from
        self.filepath = filepath

        #
        # this is the coverage mapping's reference to the underlying database
        # metadata. it will use this for all its mapping operations.
        #
        # here we simply populate the DatabaseCoverage object with a stub
        # DatabaseMetadata object, but at runtime we will inject a fully
        # collected DatabaseMetadata object as maintained by the director.
        #

        self._metadata = DatabaseMetadata()

        #
        # the address hitmap is a dictionary that effectively holds the lowest
        # level representation of the original coverage data loaded from disk.
        #
        # as the name implies, the hitmap will track the number of times a
        # given address appeared in the original coverage data.
        #
        #  Eg:
        #      hitmap =
        #      {
        #          0x8040100: 1,
        #          0x8040102: 1,
        #          0x8040105: 3,
        #          0x8040108: 3,  # 0x8040108 was executed 3 times...
        #          0x804010a: 3,
        #          0x804010f: 1,
        #          ...
        #      }
        #
        # the hitmap gives us an interesting degree of flexibility with regard
        # to what data sources we can load coverage data from, and how we
        # choose to consume it (eg, visualize coverage, heatmaps, ...)
        #
        # using hitmap.keys(), we effectively have a coverage bitmap of all
        # the addresses executed in the coverage log
        #

        self._hitmap = build_hitmap(data)

        #
        # the coverage hash is a simple hash of the coverage mask (hitmap keys)
        #
        # it is primarily used by the director as a means of quickly comparing
        # two database coverage objects against each other, and speculating on
        # the output of logical/arithmetic operations of their coverage data.
        #
        # this hash will need to be recomputed via _update_coverage_hash()
        # anytime new coverage data is introduced to this object, or when the
        # hitmap is otherwise modified internally.
        #
        # this is necessary because we cache the coverage hash. computing the
        # hash on demand is expensive, and it really shouldn't changne often.
        #
        # see the usage of 'coverage_hash' in director.py for more info
        #

        self.coverage_hash = 0
        self._update_coverage_hash()

        #
        # unmapped data is a list of addresses that we have coverage for, but
        # could not map to any defined function in the database.
        #
        # a shortcoming of lighthouse (as recently as v0.8) is that it does
        # *not* compute statistics for, or paint, loaded coverage that falls
        # outside of defined functions.
        #
        # under normal circumstances, one can just define a function at the
        # area of interest (assuming it was a disassembler issue) and refresh
        # the lighthouse metadata to 'map' the missing coverage.
        #
        # in cases of obfuscation, abnormal control flow, or self modifying
        # code, lighthouse will probably not perform well. but to be fair,
        # lighthouse was designed for displaying coverage more-so than hit
        # tracing or trace exploration.
        #
        # initially, all loaded coverage data is marked as unmapped
        #

        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(BADADDR)
        self._misaligned_data = set()

        #
        # at runtime, the map_coverage() member function of this class is
        # responsible for taking the unmapped_data mapping it on top of the
        # lifted database metadata (self._metadata).
        #
        # the process of mapping the raw coverage data will yield NodeCoverage
        # and FunctionCoverage objects. these are the buckets that the unmapped
        # coverage data is poured into during the mappinng process.
        #
        # NodeCoverage objects represent coverage at the node (basic block)
        # level and are owned by a respective FunctionCoverage object.
        #
        # FunctionCoverage represent coverage at the function level, grouping
        # children NodeCoverage objects and providing higher level statistics.
        #
        # self.nodes: address --> NodeCoverage
        # self.functions: address --> FunctionCoverage
        #

        self.nodes = {}
        self.functions = {}
        self.instruction_percent = 0.0

        #
        # we instantiate a single weakref of ourself (the DatbaseCoverage
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)
Пример #10
0
class DatabaseCoverage(object):
    """
    Database level coverage mapping.
    """
    def __init__(self, palette, name="", filepath=None, data=None):

        # color palette
        self.palette = palette

        # the name of the DatabaseCoverage object
        self.name = name

        # the filepath this coverage data was sourced from
        self.filepath = filepath

        #
        # this is the coverage mapping's reference to the underlying database
        # metadata. it will use this for all its mapping operations.
        #
        # here we simply populate the DatabaseCoverage object with a stub
        # DatabaseMetadata object, but at runtime we will inject a fully
        # collected DatabaseMetadata object as maintained by the director.
        #

        self._metadata = DatabaseMetadata()

        #
        # the address hitmap is a dictionary that effectively holds the lowest
        # level representation of the original coverage data loaded from disk.
        #
        # as the name implies, the hitmap will track the number of times a
        # given address appeared in the original coverage data.
        #
        #  Eg:
        #      hitmap =
        #      {
        #          0x8040100: 1,
        #          0x8040102: 1,
        #          0x8040105: 3,
        #          0x8040108: 3,  # 0x8040108 was executed 3 times...
        #          0x804010a: 3,
        #          0x804010f: 1,
        #          ...
        #      }
        #
        # the hitmap gives us an interesting degree of flexibility with regard
        # to what data sources we can load coverage data from, and how we
        # choose to consume it (eg, visualize coverage, heatmaps, ...)
        #
        # using hitmap.keys(), we effectively have a coverage bitmap of all
        # the addresses executed in the coverage log
        #

        self._hitmap = build_hitmap(data)

        #
        # the coverage hash is a simple hash of the coverage mask (hitmap keys)
        #
        # it is primarily used by the director as a means of quickly comparing
        # two database coverage objects against each other, and speculating on
        # the output of logical/arithmetic operations of their coverage data.
        #
        # this hash will need to be recomputed via _update_coverage_hash()
        # anytime new coverage data is introduced to this object, or when the
        # hitmap is otherwise modified internally.
        #
        # this is necessary because we cache the coverage hash. computing the
        # hash on demand is expensive, and it really shouldn't changne often.
        #
        # see the usage of 'coverage_hash' in director.py for more info
        #

        self.coverage_hash = 0
        self._update_coverage_hash()

        #
        # unmapped data is a list of addresses that we have coverage for, but
        # could not map to any defined function in the database.
        #
        # a shortcoming of lighthouse (as recently as v0.8) is that it does
        # *not* compute statistics for, or paint, loaded coverage that falls
        # outside of defined functions.
        #
        # under normal circumstances, one can just define a function at the
        # area of interest (assuming it was a disassembler issue) and refresh
        # the lighthouse metadata to 'map' the missing coverage.
        #
        # in cases of obfuscation, abnormal control flow, or self modifying
        # code, lighthouse will probably not perform well. but to be fair,
        # lighthouse was designed for displaying coverage more-so than hit
        # tracing or trace exploration.
        #
        # initially, all loaded coverage data is marked as unmapped
        #

        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(BADADDR)
        self._misaligned_data = set()

        #
        # at runtime, the map_coverage() member function of this class is
        # responsible for taking the unmapped_data mapping it on top of the
        # lifted database metadata (self._metadata).
        #
        # the process of mapping the raw coverage data will yield NodeCoverage
        # and FunctionCoverage objects. these are the buckets that the unmapped
        # coverage data is poured into during the mappinng process.
        #
        # NodeCoverage objects represent coverage at the node (basic block)
        # level and are owned by a respective FunctionCoverage object.
        #
        # FunctionCoverage represent coverage at the function level, grouping
        # children NodeCoverage objects and providing higher level statistics.
        #
        # self.nodes: address --> NodeCoverage
        # self.functions: address --> FunctionCoverage
        #

        self.nodes = {}
        self.functions = {}
        self.instruction_percent = 0.0

        #
        # we instantiate a single weakref of ourself (the DatbaseCoverage
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)

    #--------------------------------------------------------------------------
    # Properties
    #--------------------------------------------------------------------------

    @property
    def data(self):
        """
        Return the backing coverage data (a hitmap).
        """
        return self._hitmap

    @property
    def coverage(self):
        """
        Return the instruction-level coverage bitmap/mask.
        """
        return self._hitmap.viewkeys()

    @property
    def suspicious(self):
        """
        Return a bool indicating if the coverage seems badly mapped.
        """
        bad = 0
        total = len(self.nodes)

        #
        # count the number of nodes (basic blocks) that allegedly were executed
        # (they have coverage data) but don't actually have their first
        # instruction logged as executed.
        #
        # this is considered 'suspicious' and should be a red flag that the
        # provided coverage data is malformed, or for a different binary
        #

        for adddress, node_coverage in self.nodes.iteritems():
            if adddress in node_coverage.executed_instructions:
                continue
            bad += 1

        # compute a percentage of the 'bad nodes'
        percent = (bad / float(total)) * 100
        logger.debug("SUSPICIOUS: %5.2f%% (%u/%u)" % (percent, bad, total))

        #
        # if the percentage of 'bad' coverage nodes is too high, we consider
        # this database coverage as 'suspicious' or 'badly mapped'
        #
        # this number (2%) may need to be tuned. really any non-zero figure
        # is strange, but we will give some wiggle room for DBI or
        # disassembler fudginess.
        #

        return percent > 2.0

    #--------------------------------------------------------------------------
    # Metadata Population
    #--------------------------------------------------------------------------

    def update_metadata(self, metadata, delta=None):
        """
        Install a new databasee metadata object.
        """
        self._metadata = weakref.proxy(metadata)
        self.unmap_all()

    def refresh(self):
        """
        Refresh the mapping of our coverage data to the database metadata.
        """

        # rebuild our coverage mapping
        dirty_nodes, dirty_functions = self._map_coverage()

        # bake our coverage map
        self._finalize(dirty_nodes, dirty_functions)

        # update the coverage hash incase the hitmap changed
        self._update_coverage_hash()

        # dump the unmappable coverage data
        #self.dump_unmapped()

    def _finalize(self, dirty_nodes, dirty_functions):
        """
        Finalize the DatabaseCoverage statistics / data for use.
        """
        self._finalize_nodes(dirty_nodes)
        self._finalize_functions(dirty_functions)
        self._finalize_instruction_percent()

    def _finalize_nodes(self, dirty_nodes):
        """
        Finalize the NodeCoverage objects statistics / data for use.
        """
        for node_coverage in dirty_nodes.itervalues():
            node_coverage.finalize()

    def _finalize_functions(self, dirty_functions):
        """
        Finalize the FunctionCoverage objects statistics / data for use.
        """
        for function_coverage in dirty_functions.itervalues():
            function_coverage.finalize()

    def _finalize_instruction_percent(self):
        """
        Finalize the DatabaseCoverage's coverage % by instructions executed.
        """

        # sum all the instructions in the database metadata
        total = sum(f.instruction_count
                    for f in self._metadata.functions.itervalues())
        if not total:
            self.instruction_percent = 0.0
            return

        # sum the unique instructions executed across all functions
        executed = sum(f.instructions_executed
                       for f in self.functions.itervalues())

        # save the computed percentage of database instructions executed (0 to 1.0)
        self.instruction_percent = float(executed) / total

    #--------------------------------------------------------------------------
    # Data Operations
    #--------------------------------------------------------------------------

    def add_data(self, data, update=True):
        """
        Add an existing instruction hitmap to the coverage mapping.
        """

        # add the given runtime data to our data source
        for address, hit_count in data.iteritems():
            self._hitmap[address] += hit_count

        # do not update other internal structures if requested
        if not update:
            return

        # update the coverage hash in case the hitmap changed
        self._update_coverage_hash()

        # mark these touched addresses as dirty
        self._unmapped_data |= data.viewkeys()

    def add_addresses(self, addresses, update=True):
        """
        Add a list of instruction addresses to the coverage mapping.
        """

        # increment the hit count for an address
        for address in addresses:
            self._hitmap[address] += 1

        # do not update other internal structures if requested
        if not update:
            return

        # update the coverage hash in case the hitmap changed
        self._update_coverage_hash()

        # mark these touched addresses as dirty
        self._unmapped_data |= set(addresses)

    def subtract_data(self, data):
        """
        Subtract an existing instruction hitmap from the coverage mapping.
        """

        # subtract the given hitmap from our existing hitmap
        for address, hit_count in data.iteritems():
            self._hitmap[address] -= hit_count

            #
            # if there is no longer any hits for this address, delete its
            # entry from the hitmap dictionary. we don't want its entry to
            # hang around because we use self._hitmap.viewkeys() as a
            # coverage bitmap/mask
            #

            if not self._hitmap[address]:
                del self._hitmap[address]

        # update the coverage hash as the hitmap has probably changed
        self._update_coverage_hash()

        #
        # unmap everything because a complete re-mapping is easier with the
        # current implementation of things
        #

        self.unmap_all()

    def mask_data(self, coverage_mask):
        """
        Mask the hitmap data against a given coverage mask.

        Returns a new DatabaseCoverage containing the masked hitmap.
        """
        composite_data = collections.defaultdict(int)

        # preserve only hitmap data that matches the coverage mask
        for address in coverage_mask:
            composite_data[address] = self._hitmap[address]

        # done, return a new DatabaseCoverage masked with the given coverage
        return DatabaseCoverage(self.palette, data=composite_data)

    def _update_coverage_hash(self):
        """
        Update the hash of the coverage mask.
        """
        if self._hitmap:
            self.coverage_hash = hash(frozenset(self._hitmap.viewkeys()))
        else:
            self.coverage_hash = 0

    #--------------------------------------------------------------------------
    # Coverage Mapping
    #--------------------------------------------------------------------------

    def _map_coverage(self):
        """
        Map loaded coverage data to the underlying database metadata.
        """
        dirty_nodes = self._map_nodes()
        dirty_functions = self._map_functions(dirty_nodes)
        return (dirty_nodes, dirty_functions)

    def _map_nodes(self):
        """
        Map loaded coverage data to database defined nodes (basic blocks).
        """
        dirty_nodes = {}

        # the coverage data we will attempt to process in this function
        coverage_addresses = collections.deque(sorted(self._unmapped_data))

        #
        # the loop below is the core of our coverage mapping process.
        #
        # operating on whatever coverage data (instruction addresses) reside
        # within unmapped_data, this loop will attempt to bucket the coverage
        # into NodeCoverage objects where possible.
        #
        # the higher level coverage mappings (eg FunctionCoverage,
        # DatabaseCoverage) get built on top of the node mapping that we
        # perform here.
        #
        # since this loop is the most computationally expensive part of the
        # mapping process, it has been carefully profiled & optimized for
        # speed. please be careful if you wish to modify it...
        #

        while coverage_addresses:

            # get the next coverage address to map
            address = coverage_addresses.popleft()

            # get the node (basic block) metadata that this address falls in
            node_metadata = self._metadata.get_node(address)

            #
            # should we fail to locate node metadata for the coverage address
            # that we are trying to map, then the address must not fall inside
            # of a defined function.
            #
            # in this case, the coverage address will remain unmapped...
            #

            if not node_metadata:
                continue

            #
            # we found applicable node metadata for this address, now we will
            # try to find an existing bucket (NodeCoverage) for the address
            #

            if node_metadata.address in self.nodes:
                node_coverage = self.nodes[node_metadata.address]

            #
            # failed to locate an existing NodeCoverage object for this
            # address, it looks like this is the first time we have attempted
            # to bucket coverage for this node.
            #
            # create a new NodeCoverage bucket and use it now
            #

            else:
                node_coverage = NodeCoverage(node_metadata.address,
                                             self._weak_self)
                self.nodes[node_metadata.address] = node_coverage

            # compute the end address of the current basic block
            node_end = node_metadata.address + node_metadata.size

            #
            # the loop below is as an inlined fast-path that assumes the next
            # several coverage addresses will likely belong to the same node
            # that we just looked up (or created) in the code above
            #
            # we can simply re-use the current node and its coverage object
            # until the next address to be processed falls outside the node
            #

            while 1:

                #
                # map the hitmap data for the current address (an instruction)
                # to this NodeCoverage and mark the instruction as mapped by
                # discarding its address from the unmapped data list
                #

                if address in node_metadata.instructions:
                    node_coverage.executed_instructions[
                        address] = self._hitmap[address]
                    self._unmapped_data.discard(address)

                #
                # if the given address allegedly falls within this node's
                # address range, but doesn't line up with the known
                # instructions, log it as 'misaligned' / suspicious
                #

                else:
                    self._misaligned_data.add(address)

                # get the next address to attempt mapping on
                try:
                    address = coverage_addresses.popleft()

                # an IndexError implies there is nothing left to map...
                except IndexError:
                    break

                #
                # if the next address is not in this node, it's time break out
                # of this loop and send it through the full node lookup path
                #

                if not (node_metadata.address <= address < node_end):
                    coverage_addresses.appendleft(address)
                    break

            # the node was updated, so save its coverage as dirty
            dirty_nodes[node_metadata.address] = node_coverage

        # done, return a map of NodeCoverage objects that were modified
        return dirty_nodes

    def _map_functions(self, dirty_nodes):
        """
        Map loaded coverage data to database defined functions.
        """
        dirty_functions = {}

        #
        # thanks to the map_nodes(), we now have a repository of NodeCoverage
        # objects that are considered 'dirty' and can be used precisely to
        # build or update the function level coverage metadata
        #

        for node_coverage in dirty_nodes.itervalues():

            #
            # using a given NodeCoverage object, we retrieve its underlying
            # metadata so that we can perform a reverse lookup of its function
            # (parent) metadata.
            #

            function_metadata = self._metadata.nodes[
                node_coverage.address].function

            #
            # now we will attempt to retrieve the the FunctionCoverage object
            # that we need to parent the given NodeCoverage object to
            #

            function_coverage = self.functions.get(function_metadata.address,
                                                   None)

            #
            # if we failed to locate a FunctionCoverage for this node, it means
            # that this is the first time we have seen coverage for this
            # function. create a new coverage function object and use it now.
            #

            if not function_coverage:
                function_coverage = FunctionCoverage(function_metadata.address,
                                                     self._weak_self)
                self.functions[function_metadata.address] = function_coverage

            # add the NodeCoverage object to its parent FunctionCoverage
            function_coverage.mark_node(node_coverage)
            dirty_functions[function_metadata.address] = function_coverage

        # done, return a map of FunctionCoverage objects that were modified
        return dirty_functions

    def unmap_all(self):
        """
        Unmap all mapped coverage data.
        """
        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(BADADDR)
        self._misaligned_data = set()
        self.nodes = {}
        self.functions = {}

    #--------------------------------------------------------------------------
    # Debug
    #--------------------------------------------------------------------------

    def dump_unmapped(self):
        """
        Dump the unmapped coverage data.
        """
        lmsg("Unmapped Coverage:")
        for address in self._unmapped_data:
            lmsg(" * 0x%X" % address)
Пример #11
0
class CoverageDirector(object):
    """
    The Coverage Director manages loaded coverage.

    The primary role of the director is to centralize the loaded coverage
    and provide a platform for researchers to explore the relationship
    between multiple coverage sets.
    """
    def __init__(self, palette):

        # color palette
        self._palette = palette

        # database metadata cache
        self.metadata = DatabaseMetadata()

        # flag to suspend/resume the automatic coverage aggregation
        self._aggregation_suspended = False

        #----------------------------------------------------------------------
        # Coverage
        #----------------------------------------------------------------------

        # the name of the active coverage (eg filename)
        self.coverage_name = NEW_COMPOSITION

        # loaded or composed database coverage mappings
        self._database_coverage = collections.OrderedDict()

        # a NULL / empty coverage set
        self._NULL_COVERAGE = DatabaseCoverage(None, palette)

        #
        # the director automatically maintains or generates a few coverage
        # sets of its own. these are not directly modifiable by the user,
        # but may be influenced by user actions, or loaded coverage data.
        #
        # NOTE: The ordering of the dict below is the order that its items
        # will be shown in lists such as UI dropwdowns, etc.
        #

        self._special_coverage = collections.OrderedDict([
            (HOT_SHELL, DatabaseCoverage(None,
                                         palette)),  # hot shell composition
            (NEW_COMPOSITION,
             DatabaseCoverage(None, palette)),  # slow shell composition
            (AGGREGATE, DatabaseCoverage(None,
                                         palette)),  # aggregate composition
        ])

        #----------------------------------------------------------------------
        # Aliases
        #----------------------------------------------------------------------
        #
        #   Within the director, one is allowed to alias the names of the
        #   loaded coverage data it maintains. right now this is only used
        #   to assign shorthand names to coverage data.
        #
        #   in the future, this can be used for more fun/interesting user
        #   mappings and aliases :-)
        #

        #
        # mapping of alias --> coverage_name
        #   eg: 'A' --> 'my_loaded_coverage.log'
        #

        self._alias2name = {}

        #
        # mapping of coverage_name --> set(aliases)
        #   eg: 'my_loaded_coverage.log' --> set('A', 'log1', 'foo')
        #

        self._name2alias = collections.defaultdict(set)

        #
        # shorthand 'symbols' are aliases that the director automatically
        # assigns to database coverage objects. these special aliases
        # consist of a single capital letter, eg 'A'
        #
        # these auto-aliased shorthand symbols were intended to be a less
        # cumbersome way to reference specific coverage sets while composing.
        #
        # Example -
        #
        #  given these shorthand aliases:
        #
        #   'A' --> 'drcov.boombox.exe.04936.0000.proc.log'
        #   'B' --> 'drcov.boombox.exe.03297.0000.proc.log'
        #   'C' --> 'drcov.boombox.exe.08438.0000.proc.log'
        #   'D' --> 'drcov.boombox.exe.02349.0000.proc.log'
        #   ...
        #   'Z' --> 'drcov.boombox.exe.50946.0000.proc.log'
        #   <eof>
        #
        #  one can more naturally compose interesting equations
        #
        #   ((A & B) | (D & (E - F))) | Z
        #
        # the existing limitation of shorthand symbols is that there is
        # only 26 (A-Z) aliases that can be assigned to coverage sets. There
        # is no immediate plans to further expand this range.
        #
        # the primary justification for this limitation is that I don't
        # expect users to be building complex compositions with 26+ coverage
        # sets loaded at once. At that point, shorthand aliases really
        # aren't going to make things any less cumbersome.
        #

        self._shorthand = collections.deque(ASCII_SHORTHAND)

        #
        # assign default aliases
        #

        # alias the aggregate set to '*'
        self._alias_coverage(AGGREGATE, AGGREGATE_ALIAS)

        #----------------------------------------------------------------------
        # Async
        #----------------------------------------------------------------------

        self._ast_queue = Queue.Queue()
        self._composition_lock = threading.Lock()
        self._composition_cache = CompositionCache()

        self._composition_worker = threading.Thread(
            target=self._async_evaluate_ast, name="EvaluateAST")
        self._composition_worker.start()

        #----------------------------------------------------------------------
        # Callbacks
        #----------------------------------------------------------------------
        #
        #   As the director is the data source for much of Lighthouse, it
        #   is important that anything built ontop of it can act on key
        #   events or changes to the underlying data they consume.
        #
        #   Callbacks provide a way for us to notify any interested parties
        #   of these key events. Below are lists of registered notification
        #   callbacks. see 'Callbacks' section below for more info.
        #

        # coverage callbacks
        self._coverage_switched_callbacks = []
        self._coverage_modified_callbacks = []
        self._coverage_created_callbacks = []
        self._coverage_deleted_callbacks = []

        # metadata callbacks
        self._metadata_modified_callbacks = []

    def terminate(self):
        """
        Cleanup & terminate the director.
        """

        # stop the composition worker
        self._ast_queue.put(None)
        self._composition_worker.join()

        # stop any ongoing metadata refresh
        self.metadata.abort_refresh(join=True)

    #--------------------------------------------------------------------------
    # Properties
    #--------------------------------------------------------------------------

    @property
    def coverage(self):
        """
        The active database coverage.
        """
        return self.get_coverage(self.coverage_name)

    @property
    def aggregate(self):
        """
        The aggregate of loaded data.
        """
        return self._special_coverage[AGGREGATE]

    @property
    def coverage_names(self):
        """
        The names of loaded / composed coverage data.
        """
        return self._database_coverage.keys()

    @property
    def special_names(self):
        """
        The names of special / director coverage.
        """
        return self._special_coverage.keys()

    @property
    def all_names(self):
        """
        The names of both special & loaded/composed coverage data.
        """
        return self.coverage_names + self.special_names

    #----------------------------------------------------------------------
    # Callbacks
    #----------------------------------------------------------------------

    def coverage_switched(self, callback):
        """
        Subscribe a callback for coverage switch events.
        """
        register_callback(self._coverage_switched_callbacks, callback)

    def _notify_coverage_switched(self):
        """
        Notify listeners of a coverage switch event.
        """
        notify_callback(self._coverage_switched_callbacks)

    def coverage_modified(self, callback):
        """
        Subscribe a callback for coverage modification events.
        """
        register_callback(self._coverage_modified_callbacks, callback)

    def _notify_coverage_modified(self):
        """
        Notify listeners of a coverage modification event.
        """
        notify_callback(self._coverage_modified_callbacks)

    def coverage_created(self, callback):
        """
        Subscribe a callback for coverage creation events.
        """
        register_callback(self._coverage_created_callbacks, callback)

    def _notify_coverage_created(self):
        """
        Notify listeners of a coverage creation event.
        """
        notify_callback(self._coverage_created_callbacks
                        )  # TODO: send list of names created?

    def coverage_deleted(self, callback):
        """
        Subscribe a callback for coverage deletion events.
        """
        register_callback(self._coverage_deleted_callbacks, callback)

    def _notify_coverage_deleted(self):
        """
        Notify listeners of a coverage deletion event.
        """
        notify_callback(self._coverage_deleted_callbacks
                        )  # TODO: send list of names deleted?

    def metadata_modified(self, callback):
        """
        Subscribe a callback for metadata modification events.
        """
        register_callback(self._metadata_modified_callbacks, callback)

    def _notify_metadata_modified(self):
        """
        Notify listeners of a metadata modification event.
        """
        notify_callback(self._metadata_modified_callbacks)

    #----------------------------------------------------------------------
    # Batch Loading
    #----------------------------------------------------------------------

    def suspend_aggregation(self):
        """
        Suspend the aggregate computation for any newly added coverage.

        It is performant to suspend/resume aggregation if loading a number
        of individual coverage files. This will prevent the aggregate
        coverage set from being re-computed multiple times.
        """
        self._aggregation_suspended = True

    def resume_aggregation(self):
        """
        Resume the aggregate computation.
        """
        assert self._aggregation_suspended
        self._refresh_aggregate()
        self._aggregation_suspended = False

    #----------------------------------------------------------------------
    # Coverage
    #----------------------------------------------------------------------

    def select_coverage(self, coverage_name):
        """
        Activate loaded coverage by name.
        """
        logger.debug("Selecting coverage %s" % coverage_name)

        # ensure coverage data actually exists for the given coverage_name
        if not (coverage_name in self.all_names):
            raise ValueError("No coverage matching '%s' was found" %
                             coverage_name)

        #
        # if the requested switch target matches the currently active
        # coverage, then there's nothing for us to do
        #

        if self.coverage_name == coverage_name:
            return

        #
        # switch out the active coverage name with the new coverage name.
        # this pivots the director
        #

        self.coverage_name = coverage_name

        # notify any listeners that we have switched our active coverage
        self._notify_coverage_switched()

    def create_coverage(self, coverage_name, coverage_data):
        """
        Create a new coverage object maintained by the director.

        This is effectively an alias of self.update_coverage
        """
        return self.update_coverage(coverage_name, coverage_data)

    def update_coverage(self, coverage_name, coverage_data):
        """
        Create or update a coverage object.
        """
        assert not (coverage_name in RESERVED_NAMES)
        updating_coverage = coverage_name in self.coverage_names

        if updating_coverage:
            logger.debug("Updating coverage %s" % coverage_name)
        else:
            logger.debug("Adding coverage %s" % coverage_name)

        # create & map a new database coverage object using the given data
        new_coverage = self._new_coverage(coverage_data)

        #
        # coverage mapping complete, looks like we're good. add the new
        # coverage to the director's coverage table and surface it for use.
        #

        self._update_coverage(coverage_name, new_coverage)

        # assign a shorthand alias (if available) to new coverage additions
        if not updating_coverage:
            self._request_shorthand_alias(coverage_name)

        # notify any listeners that we have added or updated coverage
        if updating_coverage:
            self._notify_coverage_modified()
        else:
            self._notify_coverage_created()

        # return the created/updated coverage
        return new_coverage

    def _update_coverage(self, coverage_name, new_coverage):
        """
        Internal add/update of coverage.

        This will automatically update the director's aggregate.
        """

        #
        # if there exists coverage data under the coverage_name we are trying
        # to add/update, we first must remove anything it has contributed to
        # the aggregate before we dispose of its data
        #

        if coverage_name in self.coverage_names:
            old_coverage = self._database_coverage[coverage_name]
            self.aggregate.subtract_data(old_coverage.data)
            if not self._aggregation_suspended:
                self._refresh_aggregate()

        #
        # this is the critical point where we actually integrate the newly
        # built coverage into the director, replacing any existing entries
        #

        self._database_coverage[coverage_name] = new_coverage

        # (re)-add the newly loaded/updated coverage to the aggregate set
        self.aggregate.add_data(new_coverage.data)
        if not self._aggregation_suspended:
            self._refresh_aggregate()

    def _new_coverage(self, coverage_data):
        """
        Build a new database coverage object from the given data.
        """
        new_coverage = DatabaseCoverage(coverage_data, self._palette)
        new_coverage.update_metadata(self.metadata)
        new_coverage.refresh()
        return new_coverage

    def delete_coverage(self, coverage_name):
        """
        Delete a database coverage object by name.
        """

        #
        # if the delete request targets the currently active coverage, we want
        # to switch into a safer coverage set to try and avoid any ill effects.
        #

        if coverage_name in [self.coverage_name, AGGREGATE]:
            self.select_coverage(NEW_COMPOSITION)

        #
        # the user is trying to delete one of their own loaded/created coverages
        #

        if coverage_name in self.coverage_names:
            self._delete_user_coverage(coverage_name)

        #
        # the user is trying to delete the aggregate coverage set, which simply
        # means clears *all* loaded coverages
        #

        elif coverage_name == AGGREGATE:
            self._delete_aggregate_coverage(coverage_name)

        # unsupported / unknown coverage
        else:
            raise ValueError("Cannot delete %s, does not exist" %
                             coverage_name)

        # notify any listeners that we have deleted coverage
        self._notify_coverage_deleted()

    def _delete_user_coverage(self, coverage_name):
        """
        Delete a user created database coverage object by name.
        """

        # release the shorthand alias held by this coverage
        self._release_shorthand_alias(coverage_name)

        # delete the database coverage object
        coverage = self._database_coverage.pop(coverage_name)
        # TODO: check if there's any references to the coverage object here...

        self.aggregate.subtract_data(coverage.data)
        if not self._aggregation_suspended:
            self._refresh_aggregate()

    def _delete_aggregate_coverage(self, coverage_name):
        """
        Delete the aggregate set, effectiveely clearing all loaded covearge.
        """

        # loop through all the loaded coverage sets and release them
        for coverage_name in self.coverage_names:
            self._release_shorthand_alias(coverage_name)
            self._database_coverage.pop(coverage_name)

        # TODO: check if there's any references to the coverage aggregate...

        # assign a new, blank aggregate set
        self._special_coverage[AGGREGATE] = DatabaseCoverage(
            None, self._palette)
        self._refresh_aggregate()  # probably not needed

    def get_coverage(self, name):
        """
        Retrieve coverage data for the requested coverage_name.
        """

        # no matching coverage, return a blank coverage set
        if not name:
            return self._NULL_COVERAGE

        # if the given name was an alias, dereference it now
        coverage_name = self._alias2name.get(name, name)

        # attempt to retrieve the coverage from loaded / computed coverages
        if coverage_name in self.coverage_names:
            return self._database_coverage[coverage_name]

        # attempt to retrieve the coverage from the special directory coverages
        if coverage_name in self.special_names:
            return self._special_coverage[coverage_name]

        raise ValueError("No coverage data found for %s" % coverage_name)

    def get_coverage_string(self, coverage_name):
        """
        Retrieve a detailed coverage string for the given coverage_name.
        """

        # special case
        if coverage_name == HOT_SHELL or coverage_name == NEW_COMPOSITION:
            return coverage_name

        symbol = self.get_shorthand(coverage_name)
        coverage = self.get_coverage(coverage_name)

        #
        # build a detailed coverage string
        #   eg: 'A - 73.45% - drcov.boombox.exe.03820.0000.proc.log'
        #

        coverage_string = "%s - %5.2f%% - %s" % \
            (symbol, coverage.instruction_percent*100, coverage_name)

        return coverage_string

    #----------------------------------------------------------------------
    # Aliases
    #----------------------------------------------------------------------

    def alias_coverage(self, coverage_name, alias):
        """
        Assign an alias to loaded coverage.
        """
        assert not (alias in self.all_names)
        assert not (alias in RESERVED_NAMES)
        self._alias_coverage(coverage_name, alias)

    def _alias_coverage(self, coverage_name, alias):
        """
        Internal alias assignment routine. No restrictions.
        """

        #
        # if we are overwriting a known alias, we should remove its
        # inverse mapping reference in the name --> [aliases] map first
        #

        if alias in self._alias2name:
            self._name2alias[self._alias2name[alias]].remove(alias)

        # save the new alias
        self._alias2name[alias] = coverage_name
        self._name2alias[coverage_name].add(alias)

    def get_aliases(self, coverage_name):
        """
        Retrieve alias set for the requested coverage_name.
        """
        return self._name2alias[coverage_name]

    def get_shorthand(self, coverage_name):
        """
        Retrieve shorthand symbol for the requested coverage.
        """
        try:

            # reduce the coverage's aliases to only shorthand candidates
            shorthand = self._name2alias[coverage_name] & SHORTHAND_ALIASES

            # there can only ever be up to 1 shorthand symbols for a given coverage
            assert len(shorthand) < 2

            # pop the single shorthand symbol (if one is even aliased)
            return shorthand.pop()

        # there doesn't appear to be a shorthand symbol...
        except KeyError:
            return None

    def peek_shorthand(self):
        """
        Peek at the next available shorthand symbol.
        """
        try:
            return self._shorthand[0]
        except IndexError:
            return None

    def _request_shorthand_alias(self, coverage_name):
        """
        Assign the next shorthand A-Z alias to the given coverage.
        """
        logger.debug("Requesting shorthand alias for %s" % coverage_name)
        assert coverage_name in self.coverage_names

        # get the next symbol (A-Z) from the shorthand pool
        try:
            symbol = self._shorthand.popleft()
        except IndexError:
            return None

        # alias the shorthand to the given coverage_name
        self._alias_coverage(coverage_name, symbol)

        # return the alias symbol assigned
        return symbol

    def _release_shorthand_alias(self, coverage_name):
        """
        Release the shorthand alias of the given coverage_name.
        """
        logger.debug("Releasing shorthand alias for %s" % coverage_name)
        assert coverage_name in self.coverage_names

        # get the shorthand symbol for the given coverage
        symbol = self.get_shorthand(coverage_name)

        # if there was no symbol assigned, there's nothing to do
        if not symbol:
            return

        # delete the shorthand symbol from the alias maps
        self._name2alias[coverage_name].remove(symbol)
        self._alias2name.pop(symbol)

        # add the symbol back to the end of the shorthand pool
        self._shorthand.append(symbol)

    #----------------------------------------------------------------------
    # Composing
    #----------------------------------------------------------------------

    def add_composition(self, composite_name, ast):
        """
        Evaluate and add a new composition to the director.
        """
        assert not (composite_name in RESERVED_NAMES)
        updating_coverage = composite_name in self.coverage_names
        logger.debug("Adding composition %s" % composite_name)

        # evaluate the last AST into a coverage set
        composite_coverage = self._evaluate_composition(ast)

        # save the evaluated coverage under the given name
        self._update_coverage(composite_name, composite_coverage)

        # assign a shorthand alias (if available) to new coverage additions
        if not updating_coverage:
            self._request_shorthand_alias(composite_name)

        # notify any listeners that we have added or updated coverage
        if updating_coverage:
            self._notify_coverage_modified()
        else:
            self._notify_coverage_created()

    def cache_composition(self, ast, force=False):
        """
        Cache the given composition.
        """
        assert ast

        #
        # normally, we only pro-actively evaluate/cache if the hotshell is
        # active, but we can also allow the caller to force a cache to occur
        #

        if self.coverage_name == HOT_SHELL or force:
            self._ast_queue.put(ast)

    def _async_evaluate_ast(self):
        """
        Asynchronous composition evaluation worker loop.
        """
        logger.debug("Starting EvaluateAST thread...")

        while True:

            # get the next AST to evaluate
            ast = self._ast_queue.get()

            # signal to stop
            if ast == None:
                break

            # produce a single composite coverage object as described by the AST
            composite_coverage = self._evaluate_composition(ast)

            # we always save the most recent composite to the hotshell entry
            self._special_coverage[HOT_SHELL] = composite_coverage

            #
            # if the hotshell entry is the active coverage selection, notify
            # listeners of its update
            #

            if self.coverage_name == HOT_SHELL:
                self._notify_coverage_modified()

            # loop and wait for the next AST to evaluate

        # thread exit
        logger.debug("Exiting EvaluateAST thread...")

    def _evaluate_composition(self, ast):
        """
        Evaluate the coverage composition described by the AST.
        """

        # if the AST is effectively 'null', return a blank coverage set
        if isinstance(ast, TokenNull):
            return self._NULL_COVERAGE

        #
        # the director's composition evaluation code (this function) is most
        # generally called via the background caching evaluation thread known
        # as self._composition_worker. But this function can also be called
        # inline via the 'add_composition' function from a different thread
        # (namely, the main thread)
        #
        # because of this, we must control access to the resources the AST
        # evaluation code operates by restricting the code to one thread
        # at a time.
        #
        # should we call _evaluate_composition from the context of the main
        # IDA thread, it is important that we do so in a pseudo non-blocking
        # such that we don't hang IDA. await_lock(...) will allow the Qt/IDA
        # main thread to yield to other threads while waiting for the lock
        #

        await_lock(self._composition_lock)

        # recursively evaluate the AST
        composite_coverage = self._evaluate_composition_recursive(ast)

        # map the composited coverage data to the database metadata
        composite_coverage.update_metadata(self.metadata)
        composite_coverage.refresh()  # TODO: hash refresh?

        # done operating on shared data (coverage), release the lock
        self._composition_lock.release()

        # return the evaluated composition
        return composite_coverage

    def _evaluate_composition_recursive(self, node):
        """
        The internal (recursive) AST evaluation routine.
        """

        #
        # if the current node is a logic operator, we need to evaluate the
        # expressions that make up its input. only once each operand has
        # been reduced is it appropriate for us to manipulate them
        #

        if isinstance(node, TokenLogicOperator):

            #
            # collect the left and right components of the logical operation
            #   eg:
            #       op1 = DatabaseCoverage for 'A'
            #       op2 = DatabaseCoverage for 'B'
            #

            op1 = self._evaluate_composition_recursive(node.op1)
            op2 = self._evaluate_composition_recursive(node.op2)

            #
            # Before computing a new composition, we actually compute a hash
            # actually compute a 'hash' of the operation that would normally
            # generate the composition.
            #
            # This 'hash' can be used to index into an LRU based cache that
            # holds compositions created by the AST evaluation process.
            #
            # The 'hash' is actually computed as a product of the operator
            # that would normally combine the two coverage sets.
            #
            # For example, when computing compositions the logical operators
            # (eg |, &, ^), it does not matter which side of the equation the
            # coverage components fall on.
            #  eg:
            #      (A | B) == (B | A)
            #
            # while arithmetic operations (-) will produce different results
            #
            #      (A - B) != (B - A)
            #
            # So if we are being asked to compute a composition of (A | B),
            # we first compute:
            #
            #      composition_hash = hash(A) | hash(B)
            #
            # And use composition_hash to check an LRU cache for the complete
            # evaluation/composition of (A | B).
            #
            # The possibility of collisions are generally higher with this
            # form of 'hash', but I still expect them to be extremely rare.
            #

            composition_hash = node.operator(op1.coverage_hash,
                                             op2.coverage_hash)

            #
            # Evaluating an AST produces lots of 'transient' compositions. To
            # mitigate unecessary re-computation, we maintain a small LRU cache
            # of these compositions to draw from during evaluation.
            #
            #   eg:
            #       evaluating the input
            #
            #         (A | B) - (C | D)
            #
            #       produces
            #
            #         COMP_1 = (A | B)
            #         COMP_2 = (C | D)
            #         COMP_3 = COMP_1 - COMP_2
            #
            # In the example above, COMP_3 is the final evaluated result, and
            # COMP_1/COMP_2 would normally be discarded. Instead, we cache all
            # of these compositions (1, 2, 3) as they may be useful to us in
            # the subsequent evaluations.
            #
            # If the user then choses to evaluate (A | B) - (Z | D), our cache
            # can retrieve the fully computed (A | B) composition assuming it
            # has not been evicted.
            #

            # check the cache to see if this composition was recently computed
            cached_coverage = self._composition_cache[composition_hash]

            # if the composition was found in the cache, return that for speed
            if cached_coverage:
                return cached_coverage

            #
            # using the collected components of the logical operation, we
            # compute the coverage mask defined by this TokenLogicOperator
            #

            coverage_mask = node.operator(op1.coverage, op2.coverage)

            #
            # now that we have computed the requested coverage mask (bitmap),
            # apply the mask to the data held by the left operand (op1). we
            # return a masked copy of said DatabaseCoverage
            #

            new_composition = DatabaseCoverage(coverage_mask, self._palette)

            # cache & return the newly computed composition
            self._composition_cache[composition_hash] = new_composition
            return new_composition

        #
        # if the current node is a coverage range, we need to evaluate the
        # range expression. this will produce an aggregate coverage set
        # described by the start/end of the range (Eg, 'A,D')
        #

        elif isinstance(node, TokenCoverageRange):
            return self._evaluate_coverage_range(node)

        #
        # if the current node is a coverage token, we need simply need
        # to return its associated DatabaseCoverage.
        #

        elif isinstance(node, TokenCoverageSingle):
            return self._evaluate_coverage(node)

        #
        # unknown token? (this should never happen)
        #

        raise ValueError("Invalid AST Token in Composition Tree")

    def _evaluate_coverage(self, coverage_token):
        """
        Evaluate a TokenCoverageSingle AST token.

        Returns an existing coverage set.
        """
        assert isinstance(coverage_token, TokenCoverageSingle)
        return self.get_coverage(self._alias2name[coverage_token.symbol])

    def _evaluate_coverage_range(self, range_token):
        """
        Evaluate a TokenCoverageRange AST token.

        Returns a new aggregate coverage set.
        """
        assert isinstance(range_token, TokenCoverageRange)

        # initialize output to a null coverage set
        output = DatabaseCoverage(None, self._palette)

        # exapand 'A,Z' to ['A', 'B', 'C', ... , 'Z']
        symbols = [
            chr(x) for x in range(ord(range_token.symbol_start),
                                  ord(range_token.symbol_end) + 1)
        ]

        # build a coverage aggregate described by the range of shorthand symbols
        for symbol in symbols:
            output.add_data(self.get_coverage(self._alias2name[symbol]).data)

        # return the computed coverage
        return output

    #----------------------------------------------------------------------
    # Refresh
    #----------------------------------------------------------------------

    def refresh(self):
        """
        Complete refresh of the director and mapped coverage.
        """
        logger.debug("Refreshing the CoverageDirector")

        # (re)build our metadata cache of the underlying database
        future = self.refresh_metadata(metadata_progress, True)
        await_future(future)

        # (re)map each set of loaded coverage data to the database
        self._refresh_database_coverage()

    def refresh_metadata(self, progress_callback=None, force=False):
        """
        Refresh the database metadata cache utilized by the director.

        Returns a future (Queue) that will carry the completion message.
        """

        #
        # if this is the first time the director is going to use / populate
        # the database metadata, register the director for notifications of
        # metadata modification (this should only happen once)
        #
        # TODO: this is a little dirty, but it will suffice.
        #

        if not self.metadata.cached:
            self.metadata.function_renamed(self._notify_metadata_modified)

        #
        # if the lighthouse has collected metadata previously for this IDB
        # session (eg, it is cached), ignore a request to refresh it unless
        # explicitly told to refresh via force=True
        #

        if self.metadata.cached and not force:
            fake_queue = Queue.Queue()
            fake_queue.put(False)
            return fake_queue

        # start the asynchronous metadata refresh
        result_queue = self.metadata.refresh(
            progress_callback=progress_callback)

        # return the channel that will carry asynchronous result
        return result_queue

    def _refresh_database_coverage(self):
        """
        Refresh all the database coverage mappings managed by the director.
        """
        logger.debug("Refreshing database coverage mappings")

        for i, name in enumerate(self.all_names, 1):
            logger.debug(" - %s" % name)
            idaapi.replace_wait_box("Refreshing coverage mapping %u/%u" %
                                    (i, len(self.all_names)))
            coverage = self.get_coverage(name)
            coverage.update_metadata(self.metadata)
            coverage.refresh()

    def _refresh_aggregate(self):
        """
        Refresh the aggregate coverage set.
        """
        self.aggregate.update_metadata(self.metadata)
        self.aggregate.refresh()
Пример #12
0
class LighthouseContext(object):
    """
    A database/binary-unique instance of Lighthouse and its subsystems.
    """
    def __init__(self, core, dctx):
        disassembler[self] = DisassemblerContextAPI(dctx)
        self.core = core
        self.dctx = dctx
        self._started = False

        # the database metadata cache
        self.metadata = DatabaseMetadata(self)

        # the coverage engine
        self.director = CoverageDirector(self.metadata, self.core.palette)

        # the coverage painter
        self.painter = CoveragePainter(self, self.director, self.core.palette)

        # the coverage overview widget
        self.coverage_overview = None

        # the directory to start the coverage file dialog in
        self._last_directory = None

    @property
    def palette(self):
        return self.core.palette

    def start(self):
        """
        One-time activation a Lighthouse context and its subsystems.
        """
        if self._started:
            return
        self.core.palette.warmup()
        self.metadata.start()
        self.director.start()
        self.painter.start()

        # TODO/BINJA remove this ASAP, or find a better workaround... I hate having this here
        if disassembler.NAME == "BINJA":
            disassembler.hide_dockable("Feature Map")

        self._started = True

    def terminate(self):
        """
        Spin down any session subsystems before the session is deleted.
        """
        if not self._started:
            return
        self.painter.terminate()
        self.director.terminate()
        self.metadata.terminate()

    def select_coverage_files(self):
        """
        Prompt a file selection dialog, returning file selections.

        NOTE: This saves & reuses the last known directory for subsequent uses.
        """
        if not self._last_directory:
            self._last_directory = disassembler[self].get_database_directory()

        # create & configure a Qt File Dialog for immediate use
        file_dialog = QtWidgets.QFileDialog(None, 'Open code coverage file',
                                            self._last_directory,
                                            'All Files (*.*)')
        file_dialog.setFileMode(QtWidgets.QFileDialog.ExistingFiles)

        # prompt the user with the file dialog, and await filename(s)
        filenames, _ = file_dialog.getOpenFileNames()

        #
        # remember the last directory we were in (parsed from a selected file)
        # for the next time the user comes to load coverage files
        #

        if filenames:
            self._last_directory = os.path.dirname(filenames[0]) + os.sep

        # log the captured (selected) filenames from the dialog
        logger.debug("Captured filenames from file dialog:")
        for name in filenames:
            logger.debug(" - %s" % name)

        # return the captured filenames
        return filenames
Пример #13
0
class DatabaseCoverage(object):
    """
    Database level coverage mapping.
    """
    def __init__(self, base, indexed_data, palette):

        # the color palette used when painting this coverage
        self.palette = palette

        if not indexed_data:
            indexed_data = collections.defaultdict(int)

        self._base = base
        self.coverage_data = indexed_data
        self.unmapped_coverage = set(indexed_data.keys())
        self.unmapped_coverage.add(idaapi.BADADDR)

        # the metadata this coverage will be mapped to
        self._metadata = DatabaseMetadata(False)

        # maps to the child coverage objects
        self.nodes = {}
        self.functions = {}

        #
        # profiling revealed that letting every child (eg, FunctionCoverage
        # or NodeCoverage) create their own weakref to the parent/database
        # was actually adding a reasonable and unecessary overhead. There's
        # really no reason they need to do that anyway.
        #
        # we instantiate a single weakref of ourself (the DatbaseCoverage
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)

    #--------------------------------------------------------------------------
    # Operator Overloads
    #--------------------------------------------------------------------------

    @property
    def instruction_percent(self):
        """
        The coverage % by instructions executed.
        """
        try:
            return sum(f.instruction_percent
                       for f in self.functions.itervalues()) / len(
                           self._metadata.functions)
        except ZeroDivisionError:
            return 0.0

    #--------------------------------------------------------------------------
    # Operator Overloads
    #--------------------------------------------------------------------------

    def __or__(self, other):
        """
        Overload of '|' (logical or) operator.
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot OR DatabaseCoverage against type '%s'" % type(other))

        # initialize
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # TODO / v0.4.0: this will be refactored as a 'coverage add/or'

        # compute the union of the two coverage sets
        for address, hit_count in self.coverage_data.iteritems():
            composite_data[address] = hit_count
        for address, hit_count in other.coverage_data.iteritems():
            composite_data[address] += hit_count

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def __and__(self, other):
        """
        Overload of '&' (logical and) operator.
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot AND DatabaseCoverage against type '%s'" % type(other))

        # initialize the object
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # compute the intersecting addresses of the two coverage sets
        intersected_addresses = self.coverage_data.viewkeys(
        ) & other.coverage_data.viewkeys()

        # TODO / v0.4.0: this will be refactored as a 'coverage and'

        # accumulate the hit counters for the intersecting coverage
        for address in intersected_addresses:
            composite_data[address] = self.coverage_data[
                address] + other.coverage_data[address]

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def __sub__(self, other):
        """
        Overload of '-' (subtract) operator.
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot SUB DatabaseCoverage against type '%s'" % type(other))

        # initialize the object
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # compute the difference addresses of the two coverage sets
        difference_addresses = self.coverage_data.viewkeys(
        ) - other.coverage_data.viewkeys()

        #
        # NOTE:
        #   I'm not convinced I should acumulate the subtractee's hit counts,
        #   and I don't think it makes sense to? so for now we don't.
        #
        # TODO / v0.4.0: this will be refactored as a 'coverage subtract'
        #

        # build the new coverage data
        for address in difference_addresses:
            composite_data[address] = self.coverage_data[
                address]  #- other.coverage_data[address]

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def hitmap_subtract(self, other):
        """
        Subtract hitmaps from each other.

        TODO: dirty hack that will be removed in v0.4.0
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot SUB DatabaseCoverage hitmap against type '%s'" %
                type(other))

        # initialize the object
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # build the new coverage data
        for address in self.coverage_data.viewkeys():
            composite_data[address] = self.coverage_data[address]
        for address in other.coverage_data.viewkeys():
            composite_data[address] -= other.coverage_data[address]
            if not composite_data[address]:
                del composite_data[address]

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def __xor__(self, other):
        """
        Overload of '^' xor operator.
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot XOR DatabaseCoverage against type '%s'" % type(other))

        # initialize the object
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # compute the symmetric difference (xor) between two coverage sets
        xor_addresses = self.coverage_data.viewkeys(
        ) ^ other.coverage_data.viewkeys()

        # accumulate the hit counters for the xor'd coverage
        for address in xor_addresses & self.coverage_data.viewkeys():
            composite_data[address] = self.coverage_data[address]
        for address in xor_addresses & other.coverage_data.viewkeys():
            composite_data[address] = other.coverage_data[address]

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def __ror__(self, other):
        return self.__or__(other)

    def __rand__(self, other):
        return self.__and__(other)

    #def __rsub__(self, other):
    #    return self.__sub__(other)

    def __rxor__(self, other):
        return self.__xor__(other)

    #--------------------------------------------------------------------------
    # Metadata Population
    #--------------------------------------------------------------------------

    def update_metadata(self, metadata, delta=None):
        """
        Update the installed metadata.
        """

        # install the new metadata
        self._metadata = weakref.proxy(metadata)

        # unmap all the coverage affected by the metadata delta
        if delta:
            self._unmap_dirty(delta)

    def refresh(self):
        """
        Refresh the mapping of our coverage data to the database metadata.
        """

        # rebuild our coverage mapping
        dirty_nodes, dirty_functions = self._map_coverage()

        # bake our coverage map
        self._finalize(dirty_nodes, dirty_functions)

    def refresh_nodes(self):
        """
        Special fast-refresh of nodes as used in the un-painting process.
        """
        dirty_nodes = self._map_nodes()
        self._finalize_nodes(dirty_nodes)

    def _finalize(self, dirty_nodes, dirty_functions):
        """
        Finalize coverage objects for use.
        """
        self._finalize_nodes(dirty_nodes)
        self._finalize_functions(dirty_functions)

    def _finalize_nodes(self, dirty_nodes):
        """
        Finalize coverage nodes for use.
        """
        for node_coverage in dirty_nodes.itervalues():
            node_coverage.finalize()

    def _finalize_functions(self, dirty_functions):
        """
        Finalize coverage nodes for use.
        """
        for function_coverage in dirty_functions.itervalues():
            function_coverage.finalize()

    #--------------------------------------------------------------------------
    # Coverage Mapping
    #--------------------------------------------------------------------------

    def _map_coverage(self):
        """
        Map loaded coverage data to the given database metadata.
        """

        # re-map any unmapped coverage to nodes
        dirty_nodes = self._map_nodes()

        # re-map nodes to functions
        dirty_functions = self._map_functions(dirty_nodes)

        # return the modified objects
        return (dirty_nodes, dirty_functions)

    def _map_nodes(self):
        """
        Map loaded coverage data to database defined nodes (basic blocks).
        """
        dirty_nodes = {}
        addresses_to_map = collections.deque(sorted(self.unmapped_coverage))

        #
        # This while loop is the core of our coverage mapping process.
        #
        # The 'unmapped_coverage' list is consumed by this loop, mapping
        # any unmapped coverage data maintained by this DatabaseCoverage
        # to the given database metadata.
        #
        # It should be noted that the rest of the database coverage
        # mapping (eg functions) gets built ontop of the mappings we build
        # for nodes here using the more or less raw/recycled coverage data.
        #

        while addresses_to_map:

            # get the next address to map
            address = addresses_to_map.popleft()

            # get the node (basic block) that contains this address
            try:
                node_metadata = self._metadata.get_node(address)

            #
            # failed to locate the node (basic block) for this address.
            # this address must not fall inside of a defined function...
            #

            except ValueError:
                continue

            #
            # we found applicable node metadata for this address, now try
            # to find the coverage object for this node address
            #

            if node_metadata.address in self.nodes:
                node_coverage = self.nodes[node_metadata.address]

            #
            # failed to locate a node coverage object, looks like this is
            # the first time we have identiied coverage for this node.
            # create a coverage node object and use it now.
            #

            else:
                node_coverage = NodeCoverage(node_metadata.address,
                                             self._weak_self)
                self.nodes[node_metadata.address] = node_coverage

            # compute the basic block end now to reduce overhead in the loop below
            node_end = node_metadata.address + node_metadata.size

            #
            # the loop below can be thought of almost as an inlined fast-path
            # where we expect the next several addresses to belong to the same
            # node (basic block).
            #
            # with the assumption of linear program execution, we can reduce
            # the heavier overhead of all the lookup code above by simply
            # checking if the next address in the queue (addresses_to_map)
            # falls into the same / current node (basic block).
            #
            # we can simply re-use the current node and its coverage object
            # until the next address to be processed falls outside our scope
            #

            while 1:

                # map the coverage data for the current address to this node
                node_coverage.executed_bytes.add(address)

                #
                # ownership has been transfered to node_coverage, so this
                # address is no longer considered 'unmapped'
                #

                self.unmapped_coverage.discard(address)

                # get the next address to attempt mapping on
                address = addresses_to_map.popleft()

                #
                # if the address is not in this node, it's time break out of
                # this loop and sned it back through the full node lookup path
                #

                if not (node_metadata.address <= address < node_end):
                    addresses_to_map.appendleft(address)
                    break

                #
                # the next address to be mapped DOES fall within our current
                # node, loop back around in the fast-path and map it
                #

                # ...

            # since we updated this node, ensure we're tracking it as dirty
            dirty_nodes[node_metadata.address] = node_coverage

        # done
        return dirty_nodes

    def _map_functions(self, dirty_nodes):
        """
        Map loaded coverage data to database defined functions.
        """
        dirty_functions = {}

        #
        # thanks to the _map_nodes function, we now have a repository of
        # node coverage objects that are considered 'dirty' and can be used
        # precisely guide the generation of our function level coverage
        #

        for node_coverage in dirty_nodes.itervalues():

            #
            # using the node_coverage object, we retrieve its underlying
            # metadata so that we can perform a reverse lookup of all the
            # functions in the database that reference it
            #

            functions = self._metadata.nodes[node_coverage.address].functions

            #
            # now we can loop through every function that references this
            # node and initialize or add this node to its respective
            # coverage mapping
            #

            for function_metadata in functions.itervalues():

                #
                # retrieve the coverage object for this function address
                #

                try:
                    function_coverage = self.functions[
                        function_metadata.address]

                #
                # failed to locate a function coverage object, looks like this
                # is the first time we have identiied coverage for this
                # function. creaate a coverage function object and use it now.
                #

                except KeyError as e:
                    function_coverage = FunctionCoverage(
                        function_metadata.address, self._weak_self)
                    self.functions[
                        function_metadata.address] = function_coverage

                # mark this node as executed in the function level mappping
                function_coverage.mark_node(node_coverage)
                dirty_functions[function_metadata.address] = function_coverage

                # end of functions loop

            # end of nodes loop

        # done
        return dirty_functions

    def _unmap_dirty(self, delta):
        """
        Unmap node & function coverage affected by the metadata delta.

        The metadata delta tells us exactly which parts of the database
        changed since our last coverage mapping. This function surgically
        unmaps the pieces of our coverage that may now be stale.

        This enables us to recompute only what is necessary upon refresh.
        """

        #
        # Dirty Nodes
        #

        #
        # using the metdata delta as a guide, we loop through all the nodes it
        # has noted as either modified, or deleted. it is in our best interest
        # unmap any of these dirty (stale) node addresses in OUR coverage
        # mapping so we can selectively regenerate their coverage later.
        #

        for node_address in itertools.chain(delta.nodes_removed,
                                            delta.nodes_modified):

            #
            # if there's no coverage for this node, then we have nothing to do.
            # continue on to the next dirty node address
            #

            node_coverage = self.nodes.pop(node_address, None)
            if not node_coverage:
                continue

            # the node was found, unmap any of its tracked coverage blocks
            self.unmapped_coverage.update(node_coverage.executed_bytes)

            #
            # NOTE:
            #
            #   since we pop'd node_coverage from the database-wide self.nodes
            #   list, this loop iteration owns the last remaining 'hard' ref to
            #   the object. once the loop rolls over, it will be released.
            #
            #   what is cool about this is that its corresponding entry for
            #   this node_coverage object in any FunctionCoverage objects that
            #   reference this node will also dissapear. This is because the
            #   executed_nodes dictionaries are built using WeakValueDictionary.
            #

        #
        # Dirty Functions
        #

        # delete function coverage objects for the allegedly deleted functions
        for function_address in delta.functions_removed:
            self.functions.pop(function_address, None)