def build_hist_string(self, mod_name, oid, cutoff=200): str_list = [] args = (mod_name, oid) if api.exists(mod_name=mod_name, oid=oid): hist = api.retrieve(mod_name=mod_name,oid_list=[oid,]) hist_items = hist.items() hist_items.sort(key=operator.itemgetter(1), reverse=True) hist_items = hist_items[:cutoff] if len(hist) == 0: str_list.append("No keys were found.") for key, value in hist_items: if mod_name == "byte_histogram": formatted_pair = '%s\t%8d' % (hexlify(key),value) else: formatted_pair = '%s\t%8d' % (key,value) str_list.append(formatted_pair) else: if self.threads.has_key(args) and not self.threads[args].is_alive(): # Requested and returned NULL. str_list.append("N/A") else: str_list.append("%s file does not exist for:" % mod_name) str_list.append("\t%s" % oid) if self.threads.has_key(args) and self.threads[args].is_alive(): # Requested and still waiting. str_list.append("It is still being created.") else: # First time this has been requested. str_list.append("It is being created.") creation_thread = threading.Thread(target=api.process, args=args) creation_thread.start() self.threads[args] = creation_thread return str_list
def membership(args, opts): """ Prints the set of collections to which a file belongs. If a collection is passed its membership will not be printed Syntax: membership %<oid> ... """ valid, invalid = api.valid_oids(args) if not valid: raise ShellSyntaxError("No valid oids found") exclude_cids = [oid for oid in valid if api.exists("collections", oid)] main_oids = set(api.expand_oids(valid)) membership_cids = {} cids = [cid for cid in api.collection_cids() if cid not in exclude_cids] for cid in cids: this_oids = set(api.expand_oids(cid)) this_intersection = list(main_oids.intersection(this_oids)) if this_intersection: membership_cids[cid] = this_intersection if "noprint" not in opts: print_membership(membership_cids) return membership_cids
def process(oid, opts): logger.debug("Processing file %s", oid) import_time = int(time.time()) import_name = os.path.basename(opts["file_location"]) # strip dir from name file_stat = opts["stat"] size = file_stat["size"] data = None # Get the existing file info - if any if api.exists(name, oid, opts): data = api.retrieve(name, oid, opts, True) # If file info doesn't exist create new if not data: metadata = {import_time:{import_name:file_stat}} data = {"metadata":metadata, "names":set([import_name]), "size":size} # If data already exists append else: if "size" not in data: data["size"] = size data["metadata"][import_time] = {import_name:file_stat} data["names"].add(import_name) api.store(name, oid, data, opts) # Add import time tag tags = {"import_time":import_time} api.apply_tags(oid, tags) return True
def get_tags(oid): if not isinstance(oid, str): logger.error("get_tags must be called with a single OID") return None elif not api.exists("tags", oid): return None else: return api.retrieve("tags", oid)
def mapper(oid, opts, jobid=False): logger.debug("mapper()") src = api.source(oid) if api.documentation(src)["set"]: return None if api.exists(name, oid, opts): return oid out_histo = build_ngrams(api.retrieve(src, oid, opts)["data"], opts["n"]) api.store(name, oid, out_histo, opts) return oid
def mapper(oid, opts, jobid=False): logger.debug("mapper()") src = api.source(oid) if api.exists(name, oid, opts): return oid data = api.get_field(src, oid, "data") if not data: return None out_histo = build_histo(data) api.store(name, oid, out_histo, opts) return oid
def mapper(oid, opts, jobid=False): logger.debug("mapper()") src = api.source(oid) if api.documentation(src)["set"]: return None if api.exists(name, oid, opts): return oid opcodes = api.get_field("opcodes", oid, "opcodes") if not opcodes: return None out_histo = build_histo(opcodes) api.store(name, oid, out_histo, opts) return oid
def mapper(oid, opts, jobid=False): logger.debug("mapper()") src = api.source(oid) if api.documentation(src)["set"]: return None if api.exists(name, oid, opts): return oid nops = api.retrieve("nops", oid, opts) if not nops: return None out_histo = nop_histo(nops) api.store(name, oid, out_histo, opts) return oid
def apply_tags(oid_list, new_tags): if isinstance(oid_list, list): for oid in oid_list: apply_tags(oid, new_tags) else: oid = oid_list if not api.exists("tags", oid): tags = {} else: tags = api.retrieve("tags", oid, {}, True) for tag in new_tags: tags[tag] = new_tags[tag] api.store("tags", oid, tags)
def mapper(oid, opts, jobid=False): logger.debug("mapper()") src = api.source(oid) if api.exists(name, oid, opts): return oid map_imports = api.get_field("map_calls", oid, "system_calls") if not map_imports: return None out_histo = defaultdict(int) for addr in map_imports: out_histo[map_imports[addr]] = out_histo[map_imports[addr]] + 1 api.store(name, oid, out_histo, opts) return oid
def mapper(oid, opts, jobid=False): logger.debug("mapper()") if api.exists(name, oid, opts): return oid functions = api.retrieve("function_extract", oid) if not functions: return None out_histo = defaultdict(int) for f in functions: l = calls(functions[f]) out_histo = merge_histo(out_histo, l) api.store(name, oid, out_histo, opts) return oid
def mapper(oid, opts, jobid=False): logger.debug("mapper()") if api.exists(name, oid, opts): return oid bbs = api.retrieve("basic_blocks", oid) if not bbs: return None out_histo = defaultdict(int) for f in bbs: for bb in bbs[f]: out_histo[bb["num_insns"]] += 1 api.store(name, oid, out_histo, opts) return oid
def intersection(args, opts): """ Returns the intersection of the collections passed in, non-collection IDs will be ignored Syntax: intersection &col1 &col2 ... """ valid, invalid = api.valid_oids(args) if not valid: raise ShellSyntaxError("No valid oids found") cids = [oid for oid in valid if api.exists("collections", oid)] if not cids: raise ShellSyntaxError("No valid collections found") oids = set(api.expand_oids(cids[0])) for c in cids[1:]: oids = oids.intersection(api.expand_oids(c)) return oids
def get_opcode_hist(self, oid, cutoff=20): if api.exists('opcode_histogram', oid): hist = api.retrieve('opcode_histogram',oid_list=[oid,]) hist_items = hist.items() hist_items.sort(key=operator.itemgetter(1), reverse=True) hist_items = hist_items[:cutoff] hist = dict(hist_items) else: print "opcode_histogram file does not exist for:", print "%s" % oid args = ('opcode_histogram', oid) if self.threads.has_key(args): print "It is still being created." else: print "It is being created." creation_thread = threading.Thread(target=api.process, args=args) creation_thread.start() hist = {} return hist
def get_byte_hist(self,oid): if api.exists('byte_histogram', oid): byte_hist = api.retrieve('byte_histogram',oid_list=[oid,]) # convert all of the keys into printable form hexlified_hist = {} for key, value in byte_hist.items(): hexlified_hist[hexlify(key)] = value # add in 0s for the values that are not currently in the hist for i in range(0xff): if hex(i)[2:] not in hexlified_hist: hexlified_hist[hex(i)[2:]] = 0 hist = hexlified_hist else: print "byte_histogram file does not exist for:", print "%s" % oid args = ('byte_histogram', oid) if self.threads.has_key(args): print "It is still being created." else: print "It is being created." creation_thread = threading.Thread(target=api.process, args=args) creation_thread.start() hist = {} return hist
def name(oid): if api.exists("file_meta", oid): return api.get_field("file_meta", oid, "names").pop() elif api.exists("collections_meta", oid): return api.get_colname_from_oid(oid) return None
def hex_view(args, opts): """ Print the hex values of a file and the disassebmly Syntax: hex_print %<oid> --slice=<start>:<stop> --width=<int> --height=<int> --less [--module=[linear_disassembler]] """ args, invalid = api.valid_oids(args) args = api.expand_oids(args) if not args: if current_file: args = [current_file] else: raise ShellSyntaxError("Must provide an oid or use re_init to set file.") oid = args[0] labels = [] new_args = [] mod_opts = {} if "module" in opts: mod_opts["module"] = opts["module"] if "interactive" in opts: mod_opts["interactive"] = opts["interactive"] disassm = api.get_field("disassembly", [oid], "insns", mod_opts) comments = api.get_field("disassembly", [oid], "comments", mod_opts) if comments: labels.append(comments) start = stop = 0 width = default_width height = default_height if "slice" in opts: start, stop = get_slice(opts) if "width" in opts: try: width = int(opts["width"]) except ValueError: raise ShellSyntaxError("Invalid width") if "height" in opts: try: height = int(opts["height"]) except ValueError: raise ShellSyntaxError("Invalid height") less = False if "less" in opts: less = True heatoid = None if "heatmap" in opts: heatoid, invalid = api.valid_oids([opts["heatmap"]]) for arg in args: # First separate lables from other items if isinstance(arg, dict) and "data" not in arg: labels.append(arg) else: new_args.append(arg) for arg in new_args: src = api.source(arg) if isinstance(arg, dict) and "data" in arg: # arg is the output of 'run files <oid>' print_hex_string(arg["data"], labels, disassm, heatoid, start, stop, width, height, less) elif isinstance(arg, str) and src and api.exists(src, arg): # oid was passed data = api.get_field(src, arg, "data") print_hex_string(data, labels, disassm, heatoid, start, stop, width, height, less) elif isinstance(arg, str): print_hex_string(arg, labels, disassm, heatoid, start, stop, width, height, less) else: print " - Can't print arg %s" % arg