def _check_value_type(value, expression, mapped_fields_array): """ Function returning value type 'mac' :param value: str :return: list """ ref_objects = ['src_ref.value', 'dst_ref.value'] stix_object, stix_field = expression.object_path.split(':') compile_mac_regex = re.compile(MAC) compile_ip_regex = re.compile(IP_ADDRESS) value = list(map(str, value)) if isinstance(value, list) else [str(value)] res = value[0].strip('][').split(', ') value_type = [] if stix_object in ref_objects or stix_field in ref_objects: for each in res: if compile_mac_regex.search(each): value_type.append('mac') if compile_ip_regex.search(each): value_type.append('ip') if 'mac' in value_type and 'ip' in value_type: raise SearchFeatureNotSupportedError( "Search not supported for a combination of ipaddress and " "macaddress values") elif 'mac' in value_type: # condition to remove the IP related fields del mapped_fields_array[0] elif 'ip' in value_type: # condition to remove the Mac related fields del mapped_fields_array[1] else: raise SearchFeatureNotSupportedError( "Invalid format not supported for this operation")
def _parse_mapped_fields(self, expression, value, comparator, mapped_fields_array): comparison_string = "" self._check_value_type(value, expression, mapped_fields_array) mapped_fields_count = len(mapped_fields_array) # issubnet operator expects string literal in data source if expression.comparator in [ComparisonComparators.IsSubSet]: value = self._format_double_quotes(value) else: value = self._format_single_quotes(value) for mapped_field in mapped_fields_array: # Query formation for fields with full text search support if 'fulltextSearch' in mapped_field: # check exception condition for non-indexed fields excluding Equals and Matches for query support if expression.comparator not in [ComparisonComparators.Equal ] or not value: raise SearchFeatureNotSupportedError( "Only 'Equals' Operator is supported for non-indexed fields" ) else: comparison_string += "{value}".format(value=value) elif not value: if expression.comparator in [ ComparisonComparators.Equal, ComparisonComparators.NotEqual ]: comparator = self.arcsight_operator_lookup.get( expression.comparator) comparison_string += "{mapped_field} {comparator}".format( mapped_field=mapped_field, comparator=comparator) else: raise SearchFeatureNotSupportedError( "Empty string search is not supported for this operation" ) elif any(mapped_field in match for match in INTEGER_LOOKUP_FIELDS) and expression.comparator \ in [ComparisonComparators.Like, ComparisonComparators.Matches]: # check for excluding integer type attributes in the LIKE/MATCHES search query raise SearchFeatureNotSupportedError( "'LIKE / MATCHES' Operator is not supported for integer fields" ) else: comparison_string += "{mapped_field} {comparator} {value}".format( mapped_field=mapped_field, comparator=comparator, value=value) if mapped_fields_count > 1: comparison_string += " OR " mapped_fields_count -= 1 return comparison_string
def _parse_expression(self, expression, qualifier=None) -> str: if isinstance(expression, ComparisonExpression): # Base Case # Resolve STIX Object Path to a field in the target Data Model stix_object, stix_field = expression.object_path.split(':') # Multiple data source fields may map to the same STIX Object mapped_fields_array = self.dmm.map_field(stix_object, stix_field) # Resolve the comparison symbol to use in the query string (usually just ':') comparator = self._lookup_comparison_operator( self, expression.comparator) # Some values are formatted differently based on how they're being compared if expression.comparator == ComparisonComparators.Matches: value = self._format_equality(expression.value) elif expression.comparator == ComparisonComparators.In: value = self._format_set(expression.value) elif expression.comparator == ComparisonComparators.Equal or \ expression.comparator == ComparisonComparators.NotEqual: # Should be in single-quotes value = self._format_equality(expression.value) # '%' -> '*' wildcard elif expression.comparator == ComparisonComparators.Like: if '_' in expression.value: raise SearchFeatureNotSupportedError( "'LIKE' Operator is not supported for '_' wildcard character" ) else: value = self._format_like(expression.value) else: value = self._escape_value(expression.value) # format path like values to suit arcsight supported formats if expression.comparator not in [ComparisonComparators.In]: value = self._escape_value(value) comparison_string = self._parse_mapped_fields( self, expression, value, comparator, mapped_fields_array) if len(mapped_fields_array) > 1 and not self._is_reference_value( stix_field): # More than one data source field maps to the STIX attribute, so group comparisons together. grouped_comparison_string = "(" + comparison_string + ")" comparison_string = grouped_comparison_string if expression.negated: # exclusion conditions for negation operator if expression.comparator in [ComparisonComparators.Like]: raise SearchFeatureNotSupportedError( "'NOT' Operator is not supported for LIKE") if 'fulltextSearch' in mapped_fields_array: raise SearchFeatureNotSupportedError( "'NOT' Operator is not supported for non-indexed fields" ) comparison_string = self._negate_comparison(comparison_string) return "{}".format(comparison_string) elif isinstance(expression, CombinedComparisonExpression): operator = self._lookup_comparison_operator( self, expression.operator) expression_01 = self._parse_expression(expression.expr1) expression_02 = self._parse_expression(expression.expr2) if not expression_01 or not expression_02: return '' query_string = "{} {} {}".format(expression_01, operator, expression_02) return "{}".format(query_string) elif isinstance(expression, ObservationExpression): self._parse_time_range(qualifier, self._time_range) return self._parse_expression(expression.comparison_expression, qualifier) elif isinstance(expression, CombinedObservationExpression): operator = self._lookup_comparison_operator( self, expression.operator) expression_01 = self._parse_expression(expression.expr1, qualifier) expression_02 = self._parse_expression(expression.expr2, qualifier) # condition to pop the duplicate time qualifiers for combined queries if self.qualifier_list[-2] == self.qualifier_list[-1]: self.qualifier_list.pop(-1) if expression_01 and expression_02: return "({}) {} ({})".format(expression_01, operator, expression_02) else: return '' else: if expression_01: self.qualified_queries.append([expression_01]) if expression_02: self.qualified_queries.append([expression_02]) elif isinstance(expression, StartStopQualifier): if hasattr(expression, 'observation_expression'): return self._parse_expression( getattr(expression, 'observation_expression'), expression.qualifier) elif isinstance(expression, Pattern): return "{expr}".format( expr=self._parse_expression(expression.expression)) else: raise RuntimeError( "Unknown Recursion Case for expression={}, type(expression)={}" .format(expression, type(expression)))
def _parse_expression(self, expression, qualifier=None) -> str: """ Complete formation of native query from ANTLR expression object :param expression: expression object, ANTLR parsed expression object :param qualifier: str | None :return: None or native query as the method call is recursive """ if isinstance(expression, ComparisonExpression): # Base Case # Resolve STIX Object Path to a field in the target Data Model stix_object, stix_field = expression.object_path.split(':') # Multiple data source fields may map to the same STIX Object mapped_fields_array = self.dmm.map_field(stix_object, stix_field) # Resolve the comparison symbol to use in the query string (usually just ':') comparator = self._lookup_comparison_operator( expression.comparator) # Some values are formatted differently based on how they're being compared if expression.comparator == ComparisonComparators.Matches: # needs forward slashes value = self._format_match(expression.value) # should be (x, y, z, ...) elif expression.comparator == ComparisonComparators.In: value = self._format_set(expression.value) elif expression.comparator == ComparisonComparators.Equal \ or expression.comparator == ComparisonComparators.NotEqual \ or expression.comparator == ComparisonComparators.GreaterThan \ or expression.comparator == ComparisonComparators.LessThan \ or expression.comparator == ComparisonComparators.GreaterThanOrEqual \ or expression.comparator == ComparisonComparators.LessThanOrEqual: # Should be in single-quotes value = self._format_equality(expression.value) # '%' -> '*' wildcard, '_' -> '?' single wildcard elif expression.comparator == ComparisonComparators.Like: value = self._format_like(expression.value) else: value = self._escape_value(expression.value) if expression.negated: if expression.comparator in [ ComparisonComparators.Like, ComparisonComparators.Matches ]: raise SearchFeatureNotSupportedError( "'NOT' Operator is not supported for LIKE and MATCHES") elif stix_object in [ 'ipv4-addr', 'ipv6-addr' ] or stix_field in ['src_ref.value', 'dst_ref.value']: raise SearchFeatureNotSupportedError( "'NOT' Operator is not supported for IPV4 or IPV6 address" ) comparator = self.negated_comparator_lookup.get( expression.comparator) # to remove single quotes in specific field value if stix_field in ['pid', 'parent_ref.pid', 'account_last_login']: if expression.comparator in [ ComparisonComparators.Like, ComparisonComparators.Matches ]: raise SearchFeatureNotSupportedError( '"{operator}" operator is not supported for ' '"{stix_field}" attribute'.format( operator=expression.comparator.name.upper(), stix_field=stix_field)) value = self._format_value_without_quotes(value) if stix_field not in ['provider', 'vendor']: value = self._format_value_to_lower_case(value) # COUNTER is used to form sequential lambda function names for OData4 queries per comparison observation ''' eg. processes/any(query1:contains(tolower(query1/path), 'c:\\windows\\system32')) and processes/any(query2:contains(tolower(query2/name), 'exe')) ''' self.COUNTER += 1 comparison_string = self._parse_mapped_fields( expression, value, comparator, stix_field, mapped_fields_array, self.COUNTER) if len(mapped_fields_array) > 1: # More than one data source field maps to the STIX attribute, so group comparisons together. grouped_comparison_string = "(" + comparison_string + ")" comparison_string = grouped_comparison_string return "{}".format(comparison_string) elif isinstance(expression, CombinedComparisonExpression): operator = self._lookup_comparison_operator(expression.operator) expression_01 = self._parse_expression(expression.expr1) expression_02 = self._parse_expression(expression.expr2) if not expression_01 or not expression_02: return '' if isinstance(expression.expr1, CombinedComparisonExpression): expression_01 = "({})".format(expression_01) if isinstance(expression.expr2, CombinedComparisonExpression): expression_02 = "({})".format(expression_02) query_string = "{} {} {}".format(expression_01, operator, expression_02) return "{}".format(query_string) elif isinstance(expression, ObservationExpression): parse_string = self._parse_expression( expression.comparison_expression) time_string = self._parse_time_range(qualifier, self._time_range) sentinel_query = "({}) and ({})".format(parse_string, time_string) self.final_query_list.append(sentinel_query) elif hasattr(expression, 'qualifier') and hasattr( expression, 'observation_expression'): if isinstance(expression.observation_expression, CombinedObservationExpression): self._parse_expression(expression.observation_expression.expr1, expression.qualifier) self._parse_expression(expression.observation_expression.expr2, expression.qualifier) else: parse_string = self._parse_expression( expression.observation_expression.comparison_expression, expression.qualifier) time_string = self._parse_time_range(expression.qualifier, self._time_range) sentinel_query = "({}) and ({})".format( parse_string, time_string) self.final_query_list.append(sentinel_query) elif isinstance(expression, CombinedObservationExpression): self._parse_expression(expression.expr1, qualifier) self._parse_expression(expression.expr2, qualifier) elif isinstance(expression, Pattern): return "{expr}".format( expr=self._parse_expression(expression.expression)) else: raise RuntimeError( "Unknown Recursion Case for expression={}, type(expression)={}" .format(expression, type(expression)))
def format_comparision_string(comparison_string, mapped_field, lambda_func): # check for mapped_field that has '.' character -> example [fileStates.name,processes.name] if '.' in mapped_field: collection_attribute_array = mapped_field.split('.') collection_name = collection_attribute_array[0] attribute_nested_level = '/'.join( collection_attribute_array[1:]) if stix_field in [ 'pid', 'parent_ref.pid', 'account_last_login', 'provider', 'vendor', 'protocols[*]' ]: attribute_expression = '{fn}/'.format( fn=lambda_func) + attribute_nested_level else: attribute_expression = 'tolower({fn}/'.format( fn=lambda_func) + attribute_nested_level + ')' # ip address in data source is like "sourceAddress": "IP: 92.63.194.101 [2]\r" # to get ip address from data source using contains keyword ODATA query if mapped_field in [ 'networkConnections.sourceAddress', 'networkConnections.destinationAddress', 'fileStates.path', 'processes.path' ]: comparison_string += "{collection_name}/any({fn}:contains({attribute_expression}, {value}))".format( collection_name=collection_name, fn=lambda_func, attribute_expression=attribute_expression, value=value) elif mapped_field in [ 'fileStates.fileHash.hashValue', 'processes.fileHash.hashValue' ]: hash_string = 'fileHash/hashType' hash_type = stix_field.split('.')[1] if mapped_field == 'fileStates.fileHash.hashValue' else \ stix_field.split('.')[2] comparison_string += "({collection_name}/any({fn}:{fn}/{hash_string} {comparator} '{value}')" \ .format(collection_name=collection_name, fn=lambda_func, hash_string=hash_string, comparator='eq', value=hash_type.lower().replace('-', '')) if comparator == 'contains': comparison_string += " and {collection_name}/any({fn}:{comparator}({attribute_expression}, " \ "{value})))".format(collection_name=collection_name, fn=lambda_func, attribute_expression=attribute_expression, comparator=comparator, value=value) else: comparison_string += " and {collection_name}/any({fn}:{attribute_expression} {comparator} " \ "{value}))".format(collection_name=collection_name, fn=lambda_func, attribute_expression=attribute_expression, comparator=comparator, value=value) elif mapped_field in [ 'vendorInformation.provider', 'vendorInformation.vendor' ]: if isinstance(values, list): raise SearchFeatureNotSupportedError( '"{operator}" operator is not supported for "' '{attribute}" attribute'.format( operator=expression.comparator.name.upper(), attribute=mapped_field.split('.')[1])) if comparator == 'contains': comparison_string += "{comparator}({object}, {value})".format( object='/'.join(collection_attribute_array), comparator=comparator, value=value) else: comparison_string += "{object} {comparator} {value}".format( object='/'.join(collection_attribute_array), comparator=comparator, value=value) else: if comparator == 'contains': comparison_string += "{collection_name}/any({fn}:{comparator}({attribute_expression}, " \ "{value}))" \ .format(collection_name=collection_name, fn=lambda_func, attribute_expression=attribute_expression, comparator=comparator, value=value) else: comparison_string += "{collection_name}/any({fn}:{attribute_expression} {comparator} {value})" \ .format(collection_name=collection_name, fn=lambda_func, attribute_expression=attribute_expression, comparator=comparator, value=value) else: # check for mapped field that does not have '.' character -> example [azureTenantId,title] if comparator == 'contains': comparison_string += "{comparator}(tolower({mapped_field}), {value})".format( mapped_field=mapped_field, comparator=comparator, value=value) else: comparison_string += "tolower({mapped_field}) {comparator} {value}".format( mapped_field=mapped_field, comparator=comparator, value=value) return comparison_string
def _parse_expression(self, expression) -> str: if isinstance(expression, ComparisonExpression): # Base Case # Resolve STIX Object Path to a field in the target Data Model stix_object, stix_field = expression.object_path.split(':') mapped_object = self.dmm.map_object(stix_object) mapped_field = "{}{}".format( self.fields_prefix, self.dmm.map_field(stix_object, stix_field)) scope_to_object = "{}:{}".format(self.object_prefix, mapped_object) # Resolve the comparison symbol to use in the query string (usually just ':') comparator = self.comparator_lookup[expression.comparator] # Some values are formatted differently based on how they're being compared if expression.comparator == ComparisonComparators.Matches: # needs forward slashes value = self._format_match(expression.value) elif expression.comparator == ComparisonComparators.In: # should be (x, y, z, ...) value = self._format_set(expression.value) elif expression.comparator == ComparisonComparators.Equal or expression.comparator == ComparisonComparators.NotEqual: value = self._format_equality( expression.value) # Should be in double-quotes elif expression.comparator == ComparisonComparators.Like: # '%' -> '*' wildcard, '_' -> '?' single wildcard value = self._format_like(expression.value) else: value = self._escape_value(expression.value) comparison_string = "{mapped_field}{comparator}{value}".format( mapped_field=mapped_field, comparator=comparator, value=value) if expression.comparator == ComparisonComparators.NotEqual: comparison_string = self._negate_comparison(comparison_string) if expression.negated: comparison_string = self._negate_comparison(comparison_string) return "{scope_to_object} AND {comparison}".format( scope_to_object=scope_to_object, comparison=comparison_string) elif isinstance(expression, CombinedComparisonExpression): query_string = "({}) {} ({})".format( self._parse_expression(expression.expr1), self.comparator_lookup[expression.operator], self._parse_expression(expression.expr2)) return query_string elif isinstance(expression, ObservationExpression): return self._parse_expression(expression.comparison_expression) elif isinstance(expression, CombinedObservationExpression): if expression.operator == ObservationOperators.FollowedBy: raise SearchFeatureNotSupportedError( "{feature} on {platform}".format( feature=expression.operator, platform="Elasticsearch")) else: operator = self.comparator_lookup[expression.operator] return "({expr1}) {operator} ({expr2})".format( expr1=self._parse_expression(expression.expr1), operator=operator, expr2=self._parse_expression(expression.expr2)) elif isinstance(expression, Pattern): return self._parse_expression(expression.expression) else: raise RuntimeError( "Unknown Recursion Case for expression={}, type(expression)={}" .format(expression, type(expression)))