示例#1
0
def extract_strings(vw):
    """
    Deobfuscate strings from vivisect workspace
    """
    decoding_functions_candidates = identify_decoding_functions(vw)
    decoded_strings = floss_main.decode_strings(vw, decoding_functions_candidates, 4)
    selected_functions = floss_main.select_functions(vw, None)
    decoded_stackstrings = stackstrings.extract_stackstrings(vw, selected_functions, 4)
    decoded_strings.extend(decoded_stackstrings)
    return [ds.s for ds in decoded_strings]
示例#2
0
def extract_strings(vw):
    """
    Deobfuscate strings from vivisect workspace
    """
    decoding_functions_candidates = identify_decoding_functions(vw)
    decoded_strings = floss_main.decode_strings(vw, decoding_functions_candidates, 4)
    selected_functions = floss_main.select_functions(vw, None)
    decoded_stackstrings = stackstrings.extract_stackstrings(vw, selected_functions, 4)
    decoded_strings.extend(decoded_stackstrings)
    return [ds.s for ds in decoded_strings]
示例#3
0
def extract_strings(sample_path):
    """
    Deobfuscate strings from sample_path
    """
    vw = viv_utils.getWorkspace(sample_path)
    function_index = viv_utils.InstructionFunctionIndex(vw)
    decoding_functions_candidates = identify_decoding_functions(vw)
    decoded_strings = floss_main.decode_strings(vw, function_index, decoding_functions_candidates)
    decoded_stackstrings = stackstrings.extract_stackstrings(vw)
    decoded_strings.extend(decoded_stackstrings)
    return [ds.s for ds in decoded_strings]
示例#4
0
def extract_strings(sample_path):
    """
    Deobfuscate strings from sample_path
    """
    vw = viv_utils.getWorkspace(sample_path)
    function_index = viv_utils.InstructionFunctionIndex(vw)
    decoding_functions_candidates = identify_decoding_functions(vw)
    decoded_strings = floss_main.decode_strings(vw, function_index, decoding_functions_candidates)
    selected_functions = floss_main.select_functions(vw, None)
    decoded_stackstrings = stackstrings.extract_stackstrings(vw, selected_functions)
    decoded_strings.extend(decoded_stackstrings)
    return [ds.s for ds in decoded_strings]
示例#5
0
def extract_strings(vw):
    """
    Deobfuscate strings from vivisect workspace
    """
    top_functions, decoding_function_features = identify_decoding_functions(vw)

    for s in floss.string_decoder.decode_strings(
        vw, get_function_fvas(top_functions), MIN_STRING_LENGTH, disable_progress=True
    ):
        yield s.string

    no_tightloop_functions = get_functions_without_tightloops(decoding_function_features)
    for s in stackstrings.extract_stackstrings(vw, no_tightloop_functions, MIN_STRING_LENGTH, disable_progress=True):
        yield s.string

    tightloop_functions = get_functions_with_tightloops(decoding_function_features)
    for s in tightstrings.extract_tightstrings(vw, tightloop_functions, MIN_STRING_LENGTH, disable_progress=True):
        yield s.string
示例#6
0
文件: main.py 项目: mr-tz/flare-floss
def main(argv=None) -> int:
    """
    arguments:
      argv: the command line arguments
    """
    if argv is None:
        argv = sys.argv[1:]

    parser = make_parser(argv)
    try:
        args = parser.parse_args(args=argv)
        # manual check here, because add_mutually_exclusive_group() on argument_group("...") appears wrong
        if args.enabled_types and args.disabled_types:
            parser.error("--no and --only arguments are not allowed together")
    except ArgumentValueError as e:
        print(e)
        return -1

    set_log_config(args.debug, args.quiet)

    # Since Python 3.8 cp65001 is an alias to utf_8, but not for Python < 3.8
    # TODO: remove this code when only supporting Python 3.8+
    # https://stackoverflow.com/a/3259271/87207
    codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)

    if hasattr(args, "signatures"):
        if args.signatures == SIGNATURES_PATH_DEFAULT_STRING:
            logger.debug("-" * 80)
            logger.debug(" Using default embedded signatures.")
            logger.debug(
                " To provide your own signatures, use the form `floss.exe --signature ./path/to/signatures/  /path/to/mal.exe`."
            )
            logger.debug("-" * 80)

            sigs_path = os.path.join(get_default_root(), "sigs")
        else:
            sigs_path = args.signatures
            logger.debug("using signatures path: %s", sigs_path)

        args.signatures = sigs_path

    # TODO pass buffer along instead of file path, also should work for stdin
    sample = args.sample.name
    args.sample.close()

    if args.functions:
        # when analyzing specified functions do not show static strings
        args.disabled_types.append(StringType.STATIC)

    analysis = Analysis(
        enable_static_strings=is_string_type_enabled(StringType.STATIC, args.disabled_types, args.enabled_types),
        enable_stack_strings=is_string_type_enabled(StringType.STACK, args.disabled_types, args.enabled_types),
        enable_decoded_strings=is_string_type_enabled(StringType.DECODED, args.disabled_types, args.enabled_types),
        enable_tight_strings=is_string_type_enabled(StringType.TIGHT, args.disabled_types, args.enabled_types),
    )
    results = ResultDocument(metadata=Metadata(file_path=sample), analysis=analysis)

    time0 = time()
    interim = time0

    # in order of expected run time, fast to slow
    # 1. static strings
    # 2. stack strings
    # 3. tight strings
    # 4. decoded strings

    if results.analysis.enable_static_strings:
        logger.info("extracting static strings...")
        if os.path.getsize(sample) > sys.maxsize:
            logger.warning("file is very large, strings listings may be truncated.")

        with open(sample, "rb") as f:
            with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf:
                static_strings = list(extract_ascii_unicode_strings(buf, args.min_length))

        results.strings.static_strings = static_strings
        results.metadata.runtime.static_strings = get_runtime_diff(interim)
        interim = time()

    if (
        results.analysis.enable_decoded_strings
        or results.analysis.enable_stack_strings
        or results.analysis.enable_tight_strings
    ):
        if os.path.getsize(sample) > MAX_FILE_SIZE:
            logger.error("cannot deobfuscate strings from files larger than %d bytes", MAX_FILE_SIZE)
            return -1

        sigpaths = get_signatures(args.signatures)

        should_save_workspace = os.environ.get("FLOSS_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
        try:
            with halo.Halo(
                text="analyzing program",
                spinner="simpleDots",
                stream=sys.stderr,
                enabled=not (args.quiet or args.disable_progress),
            ):
                vw = load_vw(sample, args.format, sigpaths, should_save_workspace)
                results.metadata.runtime.vivisect = get_runtime_diff(interim)
                interim = time()
        except WorkspaceLoadError as e:
            logger.error("failed to analyze sample: %s", e)
            return -1

        results.metadata.imagebase = get_imagebase(vw)

        try:
            selected_functions = select_functions(vw, args.functions)
            results.analysis.functions.discovered = len(vw.getFunctions())
        except ValueError as e:
            # failed to find functions in workspace
            logger.error(e.args[0])
            return -1

        decoding_function_features, library_functions = find_decoding_function_features(
            vw, selected_functions, disable_progress=args.quiet or args.disable_progress
        )
        # TODO trim libfuncs from selected_funcs
        results.analysis.functions.library = len(library_functions)
        results.metadata.runtime.find_features = get_runtime_diff(interim)
        interim = time()

        logger.trace("analysis summary:")
        for k, v in get_vivisect_meta_info(vw, selected_functions, decoding_function_features).items():
            logger.trace("  %s: %s", k, v or "N/A")

        if results.analysis.enable_stack_strings:
            if results.analysis.enable_tight_strings:
                # don't run this on functions with tight loops as this will likely result in FPs
                # and should be caught by the tightstrings extraction below
                selected_functions = get_functions_without_tightloops(decoding_function_features)

            results.strings.stack_strings = extract_stackstrings(
                vw,
                selected_functions,
                args.min_length,
                verbosity=args.verbose,
                disable_progress=args.quiet or args.disable_progress,
            )
            results.analysis.functions.analyzed_stack_strings = len(selected_functions)
            results.metadata.runtime.stack_strings = get_runtime_diff(interim)
            interim = time()

        if results.analysis.enable_tight_strings:
            tightloop_functions = get_functions_with_tightloops(decoding_function_features)
            # TODO if there are many tight loop functions, emit that the program likely uses tightstrings? see #400
            results.strings.tight_strings = extract_tightstrings(
                vw,
                tightloop_functions,
                min_length=args.min_length,
                verbosity=args.verbose,
                disable_progress=args.quiet or args.disable_progress,
            )
            results.analysis.functions.analyzed_tight_strings = len(tightloop_functions)
            results.metadata.runtime.tight_strings = get_runtime_diff(interim)
            interim = time()

        if results.analysis.enable_decoded_strings:
            # TODO select more based on score rather than absolute count?!
            top_functions = get_top_functions(decoding_function_features, 20)

            fvas_to_emulate = get_function_fvas(top_functions)
            fvas_tight_functions = get_tight_function_fvas(
                decoding_function_features
            )  # TODO exclude tight functions from stackstrings analysis?!
            fvas_to_emulate = append_unique(fvas_to_emulate, fvas_tight_functions)

            if len(fvas_to_emulate) == 0:
                logger.info("no candidate decoding functions found.")
            else:
                logger.debug("identified %d candidate decoding functions", len(fvas_to_emulate))
                for fva in fvas_to_emulate:
                    results.analysis.functions.decoding_function_scores[fva] = decoding_function_features[fva]["score"]
                    logger.debug("  - 0x%x: %.3f", fva, decoding_function_features[fva]["score"])

            # TODO filter out strings decoded in library function or function only called by library function(s)
            results.strings.decoded_strings = decode_strings(
                vw,
                fvas_to_emulate,
                args.min_length,
                verbosity=args.verbose,
                disable_progress=args.quiet or args.disable_progress,
            )
            results.analysis.functions.analyzed_decoded_strings = len(fvas_to_emulate)
            results.metadata.runtime.decoded_strings = get_runtime_diff(interim)

    results.metadata.runtime.total = get_runtime_diff(time0)
    logger.info("finished execution after %.2f seconds", results.metadata.runtime.total)

    if args.json:
        r = floss.render.json.render(results)
    else:
        r = floss.render.default.render(results, args.verbose, args.quiet)

    if args.outfile:
        logger.info("writing results to %s", args.outfile)
        with open(args.outfile, "wb") as f:
            f.write(r.encode("utf-8"))
    else:
        print(r)

    return 0
示例#7
0
    def each(self, target):
        self.results = {
            'warnings': [],
            'static_strings': [],
            'decoded_strings': [],
            'stack_strings': []
        }

        try:
            data = open(target, "r").read(MAX_FILESIZE)
        except (IOError, OSError) as e:
            self.log('error', 'Cannot open file {}'.format(target))
            self.results = None
            return False

        # Load list of IOC's
        try:
            with open(self.interesting_strings_file) as f:
                self.interesting_strings = f.read().splitlines()
            self.log(
                'info', 'Loaded interesting strings from {}'.format(
                    self.interesting_strings_file))
        except:
            # No IOC file, create an empty list
            self.log('info', 'No file with interesting strings defined')
            self.interesting_strings = []

        # Load list of ignored strings
        try:
            with open(self.ignored_strings_file) as f:
                self.ignored_strings = f.read().splitlines()
            self.log(
                'info', 'Loaded ignored strings from {}'.format(
                    self.ignored_strings_file))
        except:
            # No IOC file, create an empty list
            self.log('info', 'No file with ignored strings defined')
            self.ignored_strings = []

        # Extract static strings
        static_strings = re.findall(
            "[\x1f-\x7e]{" + str(self.minimum_string_len) + ",}", data)
        for s in re.findall(
                "(?:[\x1f-\x7e][\x00]){" + str(self.minimum_string_len) + ",}",
                data):
            static_strings.append(s.decode("utf-16le"))

        if self.maximum_string_len != 0:
            for i, s in enumerate(static_strings):
                static_strings[i] = s[:self.maximum_string_len]

        if self.maximum_strings != 0 and len(
                static_strings) > self.maximum_strings:
            self.log(
                'warning', 'Maximum number of strings reached ({})'.format(
                    str(self.maximum_strings)))
            static_strings = static_strings[:self.maximum_strings]
            static_strings.append("[snip]")

        try:
            # Prepare Floss for extracting hidden & encoded strings
            vw = vivisect.VivWorkspace()
            vw.loadFromFile(target)
            vw.analyze()

            selected_functions = main.select_functions(vw, None)
            decoding_functions_candidates = id_man.identify_decoding_functions(
                vw, main.get_all_plugins(), selected_functions)
        except Exception as e:
            self.log('error', 'Cannot analyze file {}'.format(target))
            self.results = None
            return False

        # Decode & extract hidden & encoded strings
        try:
            decoded_strings = main.decode_strings(
                vw, decoding_functions_candidates, self.minimum_string_len)
            decoded_strs = main.filter_unique_decoded(decoded_strings)

            stack_strings = stackstrings.extract_stackstrings(
                vw, selected_functions, self.minimum_string_len)
            stack_strings = list(stack_strings)

            decoded_strings = [
                x for x in decoded_strs if not x in static_strings
            ]
        except Exception as e:
            self.log('error', 'Cannot extract strings from {}'.format(target))
            self.results = None
            return False

        # Populate results[] with found strings
        if len(decoded_strings) or len(stack_strings):
            self.log('info', 'Found stack or decoded strings')
            for k, s in enumerate(decoded_strings):
                buffer = main.sanitize_string_for_printing(s.s)
                skip = False
                for ignore in self.ignored_strings:
                    if str(buffer).find(ignore) >= 0:
                        skip = True
                        break
                if not skip:
                    self.results['decoded_strings'].append(buffer)
            self.search_ioc(self.results['decoded_strings'])

            for k, s in enumerate(stack_strings):
                skip = False
                for ignore in self.ignored_strings:
                    if str(s.s).find(ignore) >= 0:
                        skip = True
                        break
                if not skip:
                    self.results['stack_strings'].append(s.s)
            self.search_ioc(self.results['stack_strings'])

        # Populate results[] with static strings
        self.log('info', 'Found static strings')
        for s in static_strings:
            skip = False
            for ignore in self.ignored_strings:
                if str(s).find(ignore) >= 0:
                    skip = True
                    break
            if not skip:
                self.results['static_strings'].append(s)
        self.search_ioc(self.results['static_strings'])

        # Deduplicate warnings
        self.results['warnings'] = list(dict.fromkeys(
            self.results['warnings']))

        return True
示例#8
0
def main(argv=None):
    """
    :param argv: optional command line arguments, like sys.argv[1:]
    :return: 0 on success, non-zero on failure
    """
    logging.basicConfig(level=logging.WARNING)

    parser = make_parser()
    if argv is not None:
        options, args = parser.parse_args(argv[1:])
    else:
        options, args = parser.parse_args()

    set_log_config(options.debug, options.verbose)

    if options.list_plugins:
        print_plugin_list()
        return 0

    sample_file_path = parse_sample_file_path(parser, args)
    min_length = parse_min_length_option(options.min_length)

    # expert profile settings
    if options.expert:
        options.save_workspace = True
        options.group_functions = True
        options.quiet = False

    if not is_workspace_file(sample_file_path):
        if not options.no_static_strings and not options.functions:
            floss_logger.info("Extracting static strings...")
            if os.path.getsize(sample_file_path) > sys.maxsize:
                floss_logger.warning("File too large, strings listings may be truncated.")
                floss_logger.warning("FLOSS cannot handle files larger than 4GB on 32bit systems.")

            file_buf = get_file_as_mmap(sample_file_path)
            print_static_strings(file_buf, min_length=min_length, quiet=options.quiet)
            static_ascii_strings = strings.extract_ascii_strings(file_buf, min_length)
            static_unicode_strings = strings.extract_unicode_strings(file_buf, min_length)
            static_strings = chain(static_ascii_strings, static_unicode_strings)
            del file_buf
        else:
            static_strings = []

        if options.no_decoded_strings and options.no_stack_strings and not options.should_show_metainfo:
            if options.json_output_file:
                create_json_output_static_only(options, sample_file_path, static_strings)
            # we are done
            return 0

    if os.path.getsize(sample_file_path) > MAX_FILE_SIZE:
        floss_logger.error("FLOSS cannot extract obfuscated strings or stackstrings from files larger than"
                           " %d bytes" % MAX_FILE_SIZE)
        if options.json_output_file:
            create_json_output_static_only(options, sample_file_path, static_strings)
        return 1

    try:
        vw = load_vw(sample_file_path, options.save_workspace, options.verbose, options.is_shellcode,
                     options.shellcode_entry_point, options.shellcode_base)
    except WorkspaceLoadError:
        if options.json_output_file:
            create_json_output_static_only(options, sample_file_path, static_strings)
        return 1

    try:
        selected_functions = select_functions(vw, options.functions)
    except Exception as e:
        floss_logger.error(str(e))
        return 1

    floss_logger.debug("Selected the following functions: %s", get_str_from_func_list(selected_functions))

    selected_plugin_names = select_plugins(options.plugins)
    floss_logger.debug("Selected the following plugins: %s", ", ".join(map(str, selected_plugin_names)))
    selected_plugins = filter(lambda p: str(p) in selected_plugin_names, get_all_plugins())

    if options.should_show_metainfo:
        meta_functions = None
        if options.functions:
            meta_functions = selected_functions
        print_file_meta_info(vw, meta_functions)

    time0 = time()

    if not options.no_decoded_strings:
        floss_logger.info("Identifying decoding functions...")
        decoding_functions_candidates = im.identify_decoding_functions(vw, selected_plugins, selected_functions)
        if options.expert:
            print_identification_results(sample_file_path, decoding_functions_candidates)

        floss_logger.info("Decoding strings...")
        decoded_strings = decode_strings(vw, decoding_functions_candidates, min_length, options.no_filter,
                                         options.max_instruction_count, options.max_address_revisits + 1)
        # TODO: The de-duplication process isn't perfect as it is done here and in print_decoding_results and
        # TODO: all of them on non-sanitized strings.
        if not options.expert:
            decoded_strings = filter_unique_decoded(decoded_strings)
        print_decoding_results(decoded_strings, options.group_functions, quiet=options.quiet, expert=options.expert)
    else:
        decoded_strings = []

    if not options.no_stack_strings:
        floss_logger.info("Extracting stackstrings...")
        stack_strings = stackstrings.extract_stackstrings(vw, selected_functions, min_length, options.no_filter)
        stack_strings = list(stack_strings)
        if not options.expert:
            # remove duplicate entries
            stack_strings = set(stack_strings)
        print_stack_strings(stack_strings, quiet=options.quiet, expert=options.expert)
    else:
        stack_strings = []

    if options.x64dbg_database_file:
        imagebase = vw.filemeta.values()[0]['imagebase']
        floss_logger.info("Creating x64dbg database...")
        create_x64dbg_database(sample_file_path, options.x64dbg_database_file, imagebase, decoded_strings)

    if options.ida_python_file:
        floss_logger.info("Creating IDA script...")
        create_ida_script(sample_file_path, options.ida_python_file, decoded_strings, stack_strings)

    if options.radare2_script_file:
        floss_logger.info("Creating r2script...")
        create_r2_script(sample_file_path, options.radare2_script_file, decoded_strings, stack_strings)

    if options.binja_script_file:
        floss_logger.info("Creating Binary Ninja script...")
        create_binja_script(sample_file_path, options.binja_script_file, decoded_strings, stack_strings)

    time1 = time()
    if not options.quiet:
        print("\nFinished execution after %f seconds" % (time1 - time0))

    if options.json_output_file:
        create_json_output(options, sample_file_path,
                           decoded_strings=decoded_strings,
                           stack_strings=stack_strings,
                           static_strings=static_strings)
        floss_logger.info("Wrote JSON file to %s\n" % options.json_output_file)

    return 0
示例#9
0
    def run(self):
        """Run Floss on analyzed file.
        @return: Floss results dict.
        """
        self.key = "strings"
        self.floss = self.options.get("floss")
        self.MIN_STRINGLEN = int(self.options.get("min_str_len"))
        self.MAX_STRINGLEN = self.options.get("max_str_len")
        self.MAX_STRINGCNT = self.options.get("max_str_cnt")
        self.MAX_FILESIZE = 16*1024*1024
        
        STRING_TYPES = [
            "decoded",
            "stack",
            "static"
        ]
        
        strings = {}

        if self.task["category"] == "file":
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError(
                    "Sample file doesn't exist: \"%s\"" % self.file_path
                )

            try:
                f = File(self.file_path)
                filename = os.path.basename(self.task["target"])
                base_name = os.path.splitext(filename)[0]
                ext = filename.split(os.path.extsep)[-1].lower()
                data = open(self.file_path, "r").read(self.MAX_FILESIZE)
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)
            
            # Extract static strings
            static_strings = re.findall("[\x1f-\x7e]{" + str(self.MIN_STRINGLEN) + ",}", data)
            for s in re.findall("(?:[\x1f-\x7e][\x00]){" + str(self.MIN_STRINGLEN) + ",}", data):
                static_strings.append(s.decode("utf-16le"))

            if self.MAX_STRINGLEN != 0:
                for i, s in enumerate(static_strings):
                    static_strings[i] = s[:self.MAX_STRINGLEN]

            if self.MAX_STRINGCNT != 0 and len(static_strings) > self.MAX_STRINGCNT:
                static_strings = static_strings[:self.MAX_STRINGCNT]
                static_strings.append("[snip]")

            package = self.task.get("package")

            if self.floss and (package == "exe" or ext == "exe" or "PE32" in f.get_type()):
                # Disable floss verbose logging
                main.set_logging_levels()
                
                try:
                    # Prepare Floss for extracting hidden & encoded strings
                    vw = vivisect.VivWorkspace()
                    vw.loadFromFile(self.file_path)
                    vw.analyze()

                    selected_functions = main.select_functions(vw, None)
                    decoding_functions_candidates = id_man.identify_decoding_functions(
                        vw, main.get_all_plugins(), selected_functions
                    )
                except Exception as e:
                    raise CuckooProcessingError("Error analyzing file with vivisect: %s" % e)

                try:
                    # Decode & extract hidden & encoded strings
                    decoded_strings = main.decode_strings(
                        vw, decoding_functions_candidates, self.MIN_STRINGLEN
                    )
                    decoded_strs = main.filter_unique_decoded(decoded_strings)

                    stack_strings = stackstrings.extract_stackstrings(
                        vw, selected_functions, self.MIN_STRINGLEN
                    )
                    stack_strings = list(stack_strings)

                    decoded_strings = [x for x in decoded_strs if not x in static_strings]
                except Exception as e:
                    raise CuckooProcessingError("Error extracting strings with floss: %s" % e)

                if len(decoded_strings) or len(stack_strings):
                    # Create annotated scripts
                    if self.options.get("idapro_str_sct"):
                        idapro_sct_name = base_name + ".idb"
                        strings["idapro_sct_name"] = idapro_sct_name

                        main.create_ida_script(
                            self.file_path, os.path.join(self.str_script_path, idapro_sct_name), 
                            decoded_strings, stack_strings
                        )

                    if self.options.get("radare_str_sct"):
                        radare_sct_name = base_name + ".r2"
                        strings["radare_sct_name"] = radare_sct_name

                        main.create_r2_script(
                            self.file_path, os.path.join(self.str_script_path, radare_sct_name), 
                            decoded_strings, stack_strings
                        )

                    if self.options.get("x64dbg_str_sct"):
                        x64dbg_sct_name = base_name + ".json"
                        strings["x64dbg_sct_name"] = x64dbg_sct_name

                        imagebase = vw.filemeta.values()[0]['imagebase']
                        main.create_x64dbg_database(
                            self.file_path, os.path.join(self.str_script_path, base_name + ".json"), 
                            imagebase, decoded_strings
                        )

                # convert Floss strings into regular, readable strings
                for idx, s in enumerate(decoded_strings):
                    decoded_strings[idx] = main.sanitize_string_for_printing(s.s)

                for idx, s in enumerate(stack_strings):
                    stack_strings[idx] = s.s

                results = [decoded_strings, stack_strings, static_strings]

                for idx, str_type in enumerate(STRING_TYPES):
                    strings[str_type] = results[idx]

            else:
                strings["static"] = static_strings

        return strings