Python FeatureExtractor示例，capa.features.extractors.base_extractor.FeatureExtractor Python示例

示例#1

0

显示文件

def find_instruction_capabilities(
    ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
) -> Tuple[FeatureSet, MatchResults]:
    """
    find matches for the given rules for the given instruction.

    returns: tuple containing (features for instruction, match results for instruction)
    """
    # all features found for the instruction.
    features = collections.defaultdict(set)  # type: FeatureSet

    for feature, va in itertools.chain(
        extractor.extract_insn_features(f, bb, insn), extractor.extract_global_features()
    ):
        features[feature].add(va)

    # matches found at this instruction.
    _, matches = ruleset.match(Scope.INSTRUCTION, features, int(insn))

    for rule_name, res in matches.items():
        rule = ruleset[rule_name]
        for va, _ in res:
            capa.engine.index_rule_matches(features, rule, [va])

    return features, matches

示例#2

0

显示文件

def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, function_features: FeatureSet):
    file_features = collections.defaultdict(set)  # type: FeatureSet

    for feature, va in itertools.chain(extractor.extract_file_features(), extractor.extract_global_features()):
        # not all file features may have virtual addresses.
        # if not, then at least ensure the feature shows up in the index.
        # the set of addresses will still be empty.
        if va:
            file_features[feature].add(va)
        else:
            if feature not in file_features:
                file_features[feature] = set()

    logger.debug("analyzed file and extracted %d features", len(file_features))

    file_features.update(function_features)

    _, matches = ruleset.match(Scope.FILE, file_features, 0x0)
    return matches, len(file_features)

示例#3

0

显示文件

文件： main.py 项目： ekmixon/capa

def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor,
                               f: FunctionHandle):
    # contains features from:
    #  - insns
    #  - function
    function_features = collections.defaultdict(set)  # type: FeatureSet
    bb_matches = collections.defaultdict(list)  # type: MatchResults

    for feature, va in extractor.extract_function_features(f):
        function_features[feature].add(va)

    for bb in extractor.get_basic_blocks(f):
        # contains features from:
        #  - insns
        #  - basic blocks
        bb_features = collections.defaultdict(set)

        for feature, va in extractor.extract_basic_block_features(f, bb):
            bb_features[feature].add(va)
            function_features[feature].add(va)

        for insn in extractor.get_instructions(f, bb):
            for feature, va in extractor.extract_insn_features(f, bb, insn):
                bb_features[feature].add(va)
                function_features[feature].add(va)

        _, matches = capa.engine.match(ruleset.basic_block_rules, bb_features,
                                       int(bb))

        for rule_name, res in matches.items():
            bb_matches[rule_name].extend(res)
            for va, _ in res:
                function_features[capa.features.common.MatchedRule(
                    rule_name)].add(va)

    _, function_matches = capa.engine.match(ruleset.function_rules,
                                            function_features, int(f))
    return function_matches, bb_matches, len(function_features)

示例#4

0

显示文件

def find_basic_block_capabilities(
    ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
    """
    find matches for the given rules within the given basic block.

    returns: tuple containing (features for basic block, match results for basic block, match results for instructions)
    """
    # all features found within this basic block,
    # includes features found within instructions.
    features = collections.defaultdict(set)  # type: FeatureSet

    # matches found at the instruction scope.
    # might be found at different instructions, thats ok.
    insn_matches = collections.defaultdict(list)  # type: MatchResults

    for insn in extractor.get_instructions(f, bb):
        ifeatures, imatches = find_instruction_capabilities(ruleset, extractor, f, bb, insn)
        for feature, vas in ifeatures.items():
            features[feature].update(vas)

        for rule_name, res in imatches.items():
            insn_matches[rule_name].extend(res)

    for feature, va in itertools.chain(
        extractor.extract_basic_block_features(f, bb), extractor.extract_global_features()
    ):
        features[feature].add(va)

    # matches found within this basic block.
    _, matches = ruleset.match(Scope.BASIC_BLOCK, features, int(bb))

    for rule_name, res in matches.items():
        rule = ruleset[rule_name]
        for va, _ in res:
            capa.engine.index_rule_matches(features, rule, [va])

    return features, matches, insn_matches

示例#5

0

显示文件

def find_code_capabilities(
    ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
    """
    find matches for the given rules within the given function.

    returns: tuple containing (match results for function, match results for basic blocks, match results for instructions, number of features)
    """
    # all features found within this function,
    # includes features found within basic blocks (and instructions).
    function_features = collections.defaultdict(set)  # type: FeatureSet

    # matches found at the basic block scope.
    # might be found at different basic blocks, thats ok.
    bb_matches = collections.defaultdict(list)  # type: MatchResults

    # matches found at the instruction scope.
    # might be found at different instructions, thats ok.
    insn_matches = collections.defaultdict(list)  # type: MatchResults

    for bb in extractor.get_basic_blocks(f):
        features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, f, bb)
        for feature, vas in features.items():
            function_features[feature].update(vas)

        for rule_name, res in bmatches.items():
            bb_matches[rule_name].extend(res)

        for rule_name, res in imatches.items():
            insn_matches[rule_name].extend(res)

    for feature, va in itertools.chain(extractor.extract_function_features(f), extractor.extract_global_features()):
        function_features[feature].add(va)

    _, function_matches = ruleset.match(Scope.FUNCTION, function_features, int(f))
    return function_matches, bb_matches, insn_matches, len(function_features)

示例#6

0

显示文件

def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor,
                               f: FunctionHandle):
    # contains features from:
    #  - insns
    #  - function
    function_features = collections.defaultdict(set)  # type: FeatureSet
    bb_matches = collections.defaultdict(list)  # type: MatchResults

    for feature, va in itertools.chain(extractor.extract_function_features(f),
                                       extractor.extract_global_features()):
        function_features[feature].add(va)

    for bb in extractor.get_basic_blocks(f):
        # contains features from:
        #  - insns
        #  - basic blocks
        bb_features = collections.defaultdict(set)

        for feature, va in itertools.chain(
                extractor.extract_basic_block_features(f, bb),
                extractor.extract_global_features()):
            bb_features[feature].add(va)
            function_features[feature].add(va)

        for insn in extractor.get_instructions(f, bb):
            for feature, va in itertools.chain(
                    extractor.extract_insn_features(f, bb, insn),
                    extractor.extract_global_features()):
                bb_features[feature].add(va)
                function_features[feature].add(va)

        _, matches = ruleset.match(Scope.BASIC_BLOCK, bb_features, int(bb))

        for rule_name, res in matches.items():
            bb_matches[rule_name].extend(res)
            rule = ruleset[rule_name]
            for va, _ in res:
                capa.engine.index_rule_matches(function_features, rule, [va])

    _, function_matches = ruleset.match(Scope.FUNCTION, function_features,
                                        int(f))
    return function_matches, bb_matches, len(function_features)

示例#7

0

显示文件

文件： main.py 项目： ekmixon/capa

def find_capabilities(ruleset: RuleSet,
                      extractor: FeatureExtractor,
                      disable_progress=None) -> Tuple[MatchResults, Any]:
    all_function_matches = collections.defaultdict(list)  # type: MatchResults
    all_bb_matches = collections.defaultdict(list)  # type: MatchResults

    meta = {
        "feature_counts": {
            "file": 0,
            "functions": {},
        },
        "library_functions": {},
    }  # type: Dict[str, Any]

    pbar = tqdm.tqdm
    if disable_progress:
        # do not use tqdm to avoid unnecessary side effects when caller intends
        # to disable progress completely
        pbar = lambda s, *args, **kwargs: s

    functions = list(extractor.get_functions())
    n_funcs = len(functions)

    pb = pbar(functions,
              desc="matching",
              unit=" functions",
              postfix="skipped 0 library functions")
    for f in pb:
        function_address = int(f)

        if extractor.is_library_function(function_address):
            function_name = extractor.get_function_name(function_address)
            logger.debug("skipping library function 0x%x (%s)",
                         function_address, function_name)
            meta["library_functions"][function_address] = function_name
            n_libs = len(meta["library_functions"])
            percentage = 100 * (n_libs / n_funcs)
            if isinstance(pb, tqdm.tqdm):
                pb.set_postfix_str("skipped %d library functions (%d%%)" %
                                   (n_libs, percentage))
            continue

        function_matches, bb_matches, feature_count = find_function_capabilities(
            ruleset, extractor, f)
        meta["feature_counts"]["functions"][function_address] = feature_count
        logger.debug("analyzed function 0x%x and extracted %d features",
                     function_address, feature_count)

        for rule_name, res in function_matches.items():
            all_function_matches[rule_name].extend(res)
        for rule_name, res in bb_matches.items():
            all_bb_matches[rule_name].extend(res)

    # collection of features that captures the rule matches within function and BB scopes.
    # mapping from feature (matched rule) to set of addresses at which it matched.
    function_and_lower_features = {
        capa.features.common.MatchedRule(rule_name):
        set(map(lambda p: p[0], results))
        for rule_name, results in itertools.chain(all_function_matches.items(),
                                                  all_bb_matches.items())
    }  # type: FeatureSet

    all_file_matches, feature_count = find_file_capabilities(
        ruleset, extractor, function_and_lower_features)
    meta["feature_counts"]["file"] = feature_count

    matches = {
        rule_name: results
        for rule_name, results in itertools.chain(
            # each rule exists in exactly one scope,
            # so there won't be any overlap among these following MatchResults,
            # and we can merge the dictionaries naively.
            all_bb_matches.items(),
            all_function_matches.items(),
            all_file_matches.items(),
        )
    }

    return matches, meta