def equivalent_patterns(pattern1, pattern2, stix_version=stix2.DEFAULT_VERSION): """ Determine whether two STIX patterns are semantically equivalent. Args: pattern1: The first STIX pattern pattern2: The second STIX pattern stix_version: The STIX version to use for pattern parsing, as a string ("2.0", "2.1", etc). Defaults to library-wide default version. Returns: True if the patterns are semantically equivalent; False if not """ patt_ast1 = stix2.pattern_visitor.create_pattern_object( pattern1, version=stix_version, ) patt_ast2 = stix2.pattern_visitor.create_pattern_object( pattern2, version=stix_version, ) pattern_canonicalizer = _get_pattern_canonicalizer() canon_patt1, _ = pattern_canonicalizer.transform(patt_ast1) canon_patt2, _ = pattern_canonicalizer.transform(patt_ast2) result = observation_expression_cmp(canon_patt1, canon_patt2) return result == 0
def __is_contained_followedby(self, exprs_containee, exprs_container): """ Determine whether the "containee" expressions are contained in the "container" expressions, with FOLLOWEDBY semantics (order-sensitive and need distinct bindings). For example (with containee on left and container on right): (A followedby B) or (B followedby A) In the above, all of the lhs vars have a counterpart in the rhs, but the vars on the right are not in the same order. Therefore, the right does not "contain" the left. The container vars don't have to be contiguous though. E.g. in: (A followedby B) or (D followedby A followedby C followedby B) in the container (rhs), B follows A, so it "contains" the lhs even though there is other stuff mixed in. Args: exprs_containee: The expressions we want to check for containment exprs_container: The expressions acting as the "container" Returns: True if the containee is contained in the container; False if not """ ee_iter = iter(exprs_containee) er_iter = iter(exprs_container) result = True while True: ee = next(ee_iter, None) if not ee: break while True: er = next(er_iter, None) if er: if observation_expression_cmp(ee, er) == 0: break else: break if not er: result = False break return result
def find_equivalent_patterns( search_pattern, patterns, stix_version=stix2.DEFAULT_VERSION, ): """ Find patterns from a sequence which are equivalent to a given pattern. This is more efficient than using equivalent_patterns() in a loop, because it doesn't re-canonicalize the search pattern over and over. This works on an input iterable and is implemented as a generator of matches. So you can "stream" patterns in and matching patterns will be streamed out. :param search_pattern: A search pattern as a string :param patterns: An iterable over patterns as strings :param stix_version: The STIX version to use for pattern parsing, as a string ("2.0", "2.1", etc). Defaults to library-wide default version. :return: A generator iterator producing the semantically equivalent patterns """ search_pattern_ast = stix2.pattern_visitor.create_pattern_object( search_pattern, version=stix_version, ) pattern_canonicalizer = _get_pattern_canonicalizer() canon_search_pattern_ast, _ = pattern_canonicalizer.transform( search_pattern_ast, ) for pattern in patterns: pattern_ast = stix2.pattern_visitor.create_pattern_object( pattern, version=stix_version, ) canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast) result = observation_expression_cmp( canon_search_pattern_ast, canon_pattern_ast, ) if result == 0: yield pattern
def __is_contained_and(self, exprs_containee, exprs_container): """ Determine whether the "containee" expressions are contained in the "container" expressions, with AND semantics (order-independent but need distinct bindings). For example (with containee on left and container on right): (A and A and B) or (A and B and C) In the above, all of the lhs vars have a counterpart in the rhs, but there are two A's on the left and only one on the right. Therefore, the right does not "contain" the left. You would need two A's on the right. Args: exprs_containee: The expressions we want to check for containment exprs_container: The expressions acting as the "container" Returns: True if the containee is contained in the container; False if not """ # make our own list we are free to manipulate without affecting the # function args. container = list(exprs_container) result = True for ee in exprs_containee: for i, er in enumerate(container): if observation_expression_cmp(ee, er) == 0: # Found a match in the container; delete it so we never try # to match a container expr to two different containee # expressions. del container[i] break else: result = False break return result