Пример #1
0
    def import_src(self, src_db):
        self.load_hooks()
        self.src_db = src_db
        matches = False
        try:
            self.db.execute('attach "%s" as src' % src_db)
        except:
            pass

        if self.find_initial_rows():
            self.find_callgraph_matches()
            self.choose_best_matches(is_final=True)
            if len(self.best_matches) > 0:
                matches = True

        if matches:
            c = CDiffChooser(
                self, "Matched functions for %s" % os.path.basename(src_db),
                self.best_matches, self)
            c.show()

            if _DEBUG:
                c = CDiffChooser(self, "Dubious matches", self.dubious_matches,
                                 self)
                c.show()
        else:
            Warning("No matches found.")
            log("No matches found.")
Пример #2
0
  def open_or_create_database(self, force=False):
    self.db_filename = os.path.splitext(self.db_path)[0] + "-src.sqlite"
    if not os.path.exists(self.db_filename) or self.different_versions() or force:
      if not from_ida:
        raise Exception("Export process can only be done from within IDA")

      # self.is_old_version(self.db_filename)
      log("Exporting current database...")
      exporter = CBinaryToSourceExporter()
      exporter.export(self.db_filename)
Пример #3
0
def indent_source(src):
  global indent_cmd

  try:
    p = Popen(indent_cmd, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
    indenter = p.communicate(input=src)[0]
    tmp = indenter.decode()
    if tmp != "" and tmp is not None:
      tmp = tmp.replace("<", "&lt;").replace(">", "&gt;")
      return tmp
  except:
    log("Error indenting: %s" % (str(sys.exc_info()[1])))
    return src.replace("<", "&lt;").replace(">", "&gt;")
Пример #4
0
    def open_or_create_database(self, force=False):
        self.db_filename = os.path.splitext(self.db_path)[0] + "-src.sqlite"
        if not os.path.exists(
                self.db_filename) or self.different_versions() or force:
            if not from_ida:
                raise Exception(
                    "Export process can only be done from within IDA")

            # Load the project specific hooks
            self.load_hooks()

            # And export the current database
            log("Exporting current database...")
            exporter = CBinaryToSourceExporter(hooks=self.hooks)
            exporter.export(self.db_filename)
Пример #5
0
    def find_one_callgraph_match(self,
                                 src_id,
                                 bin_ea,
                                 min_level,
                                 call_type="callee",
                                 iteration=1):
        cur = self.db.cursor()
        sql = "select * from functions where ea = ?"
        cur.execute(sql, (str(bin_ea), ))
        row = cur.fetchone()
        if row is not None:
            bin_id = row["id"]
            src_rows = list(self.get_source_call_type(src_id, call_type))
            if src_rows is not None and len(src_rows) > 0:
                bin_rows = list(self.get_binary_call_type(bin_ea, call_type))
                if bin_rows:
                    if len(bin_rows) * len(
                            src_rows) > self.max_cartesian_product:
                        msg = "Cartesian product finding %ss for SRC=%d/BIN=0x%08x(%s) too big (%d)..."
                        log(msg % (call_type, src_id, long(bin_ea),
                                   row["name"], len(bin_rows) * len(src_rows)))
                    elif len(bin_rows) > 0:
                        if _DEBUG:
                            print "Finding matches in a cartesian product of %d x %d row(s)" % (
                                len(src_rows), len(bin_rows))
                        for src_row in src_rows:
                            for bin_row in bin_rows:
                                curr_bin_id = self.get_binary_func_id(
                                    bin_row[call_type])
                                if not curr_bin_id:
                                    continue

                                score, reasons, ml = self.compare_functions(
                                    src_row[call_type], curr_bin_id,
                                    CALLGRAPH_MATCH)
                                if score >= min_level:
                                    func_name = self.get_source_func_name(
                                        src_row[call_type])
                                    self.add_match(
                                        long(src_row[call_type]),
                                        bin_row[call_type], func_name,
                                        "Callgraph match (%s, iteration %d)" %
                                        (call_type, iteration), score, reasons,
                                        ml)

        cur.close()
Пример #6
0
    def import_src(self, src_db):
        matches = False
        self.db.execute('attach "%s" as src' % src_db)
        if self.find_initial_rows():
            self.find_callgraph_matches()
            self.choose_best_matches(is_final=True)
            if len(self.best_matches) > 0:
                matches = True

        if matches:
            c = CDiffChooser(self, "Matched functions", self.best_matches,
                             self)
            c.show()

            if _DEBUG:
                c = CDiffChooser(self, "Dubious matches", self.dubious_matches,
                                 self)
                c.show()
        else:
            Warning("No matches found.")
            log("No matches found.")
Пример #7
0
    def load_hooks(self):
        if self.project_script is None or self.project_script == "":
            return True

        try:
            module = imp.load_source("pigaios_hooks", self.project_script)
        except:
            log("Error loading project specific Python script: %s" %
                str(sys.exc_info()[1]))
            return False

        if module is None:
            # How can it be?
            return False

        keys = dir(module)
        if 'HOOKS' not in keys:
            log("Error: The project specific script doesn't export the HOOKS dictionary"
                )
            return False

        hooks = module.HOOKS
        if 'PigaiosHooks' not in hooks:
            log("Error: The project specific script exports the HOOK dictionary but it doesn't contain a 'PigaiosHooks' entry."
                )
            return False

        hook_class = hooks["PigaiosHooks"]
        self.hooks = hook_class(self)
        return True
Пример #8
0
    def find_callgraph_matches(self):
        log("Finding callgraph matches...")
        i = 0
        dones = set()
        ea_dones = set()

        while 1:
            t = time.time()

            i += 1
            log("Iteration %d, discovered a total of %d row(s)..." %
                (i, len(self.best_matches)))
            total = len(self.best_matches)

            # Iterate through the best matches we first found.
            # NOTES: The 'match_id' is the id of the function in the source code.
            for match_id in list(self.best_matches):
                if match_id in dones:
                    continue
                dones.add(match_id)

                if match_id in self.best_matches:
                    ea, bin_caller, heur, score, reasons, ml = self.best_matches[
                        match_id]
                    if ea in ea_dones:
                        continue
                    ea_dones.add(ea)

                    if i == 1 or score >= self.min_level or ml == 1.0:
                        self.find_nearby_functions(match_id, ea,
                                                   0.3 + (i * 0.1), i)
                        self.find_one_callgraph_match(match_id, ea,
                                                      self.min_level, "callee",
                                                      i)
                        self.find_one_callgraph_match(match_id, ea,
                                                      self.min_level, "caller",
                                                      i)

                    # More than 5 minutes for a single iteration is too long...
                    if time.time() - t >= 60 * 5:
                        log("Iteration took too long, continuing...")
                        break

            self.choose_best_matches()
            if len(self.best_matches) == total:
                break
Пример #9
0
    lexer = shlex.shlex(x.iIndentCommand.value)
    lexer.wordchars += "\:-."
    indent_cmd = list(lexer)

    importer = CIDABinaryToSourceImporter()
    importer.min_level = min_level
    importer.min_display_level = min_display_level
    importer.use_decompiler = x.rUseDecompiler.checked
    importer.import_src(database)
  finally:
    hide_wait_box()

if __name__ == "__main__":
  try:
    try:
      if os.getenv("DIAPHORA_PROFILE") is not None:
        import cProfile
        profiler = cProfile.Profile()
        profiler.runcall(main)
        exported = True
        profiler.print_stats(sort="time")
      else:
        main()
    except:
      log("ERROR: %s" % str(sys.exc_info()[1]))
      traceback.print_exc()
      raise
  finally:
    hide_wait_box()

Пример #10
0
    def find_initial_rows(self):
        cur = self.db.cursor()
        sql = """ select bin.ea, src.name, src.id, bin.id
                from functions bin,
                     src.functions src
               where (bin.conditions between src.conditions and src.conditions + 3
                   or bin.name = src.name)
                 and bin.constants = src.constants
                 and bin.constants_json = src.constants_json
                 and (select count(*) from src.functions x where x.constants_json = src.constants_json) < %d
                 and src.constants_json != '[]'
                 and src.constants > 0
                 and src.conditions > 1
                 and bin.loops = src.loops """

        cur.execute("select count(*) from src.functions")
        row = cur.fetchone()
        total = row[0]

        if has_ml:
            log("Decision tree based system available")

        log("Finding best matches...")
        rows = []
        for i in range(1, 6):
            # Constants must appear less than i% of the time in the sources
            val = (total * i / 100)
            cur.execute(sql % val)
            row = cur.fetchone()
            if row:
                rows = cur.fetchall()
                rows.insert(0, row)
                break

        size = len(rows)
        if size > 0:
            matches_count = {}
            for row in rows:
                try:
                    matches_count[row[1]] += 1
                except:
                    matches_count[row[1]] = 1

            max_score = 0
            min_score = 1
            for row in rows:
                func_ea = long(row[0])
                match_name = row[1]
                match_id = row[2]
                bin_id = row[3]
                score, reasons, ml = self.compare_functions(
                    match_id, bin_id, ATTRIBUTES_MATCHING)
                if score < min_score:
                    min_score = score
                if score > max_score:
                    max_score = score

                self.add_match(match_id, func_ea, match_name,
                               "Attributes matching", score, reasons, ml)

            log("Minimum score %f, maximum score %f" % (min_score, max_score))
            # We have had too good matches or too few, use a more relaxed minimum score
            if min_score > 0.5:
                min_score = 0.5

            # If the minimum ratios were set to '0', calculate them from the minimum
            # ratio we get from the initial best matches (which must be false positives
            # free).
            if self.min_level == 0.0:
                self.min_level = min(abs(min_score - 0.3), 0.01)

            if self.min_display_level == 0.0:
                self.min_display_level = max(abs(min_score - 0.3), 0.3)

        log("Minimum score for calculations: %f" % self.min_level)
        log("Minimum score to show results : %f" % self.min_display_level)

        sql = """ select distinct bin_func.ea, src_func.name, src_func.id, bin_func.id
                from functions bin_func,
                     constants bin_const,
                     src.functions src_func,
                     src.constants src_const
               where bin_const.constant = src_const.constant
                 and bin_func.id = bin_const.func_id
                 and src_func.id = src_const.func_id
                 and (select count(*)
                        from src.constants sc
                       where sc.constant = src_const.constant
                      ) <= 3"""
        cur.execute(sql)
        while 1:
            row = cur.fetchone()
            if not row:
                break

            size += 1
            func_ea = long(row[0])
            match_name = row[1]
            match_id = row[2]
            bin_id = row[3]
            score, reasons, ml = self.compare_functions(
                match_id, bin_id, SAME_RARE_CONSTANT)
            self.add_match(match_id, func_ea, match_name, "Same rare constant",
                           score, reasons, ml)

        cur.close()
        return size != 0
Пример #11
0
 def log(self, msg):
     log(msg)