def apply_template(self, test_event): """Apply default values to the given test event Args: test_event (dict): The loaded test event """ event_log = self.cli_config['logs'].get(test_event['log']) parser = event_log['parser'] schema = event_log['schema'] configuration = event_log.get('configuration', {}) # Add envelope keys schema.update(configuration.get('envelope_keys', {})) # Setup the parser to access default optional values self.parsers[parser] = self.parsers.get(parser, get_parser(parser)) # Add apply default values based on the declared schema default_test_event = { key: self.parsers[parser].default_optional_values(value) for key, value in schema.iteritems() } # Fill in the fields left out in the 'override_record' field, # and update the test event with a full 'data' key default_test_event.update(test_event['override_record']) test_event['data'] = default_test_event
def _process_log_schemas(self, payload): """Get any log schemas that matched this log format Args: payload: A StreamAlert payload object Returns: list: Contains any schemas that matched this log format Each list entry contains the namedtuple of 'SchemaMatch' with values of log_name, root_schema, parser, and parsed_data """ schema_match = namedtuple( 'SchemaMatch', 'log_name, root_schema, parser, parsed_data') schema_matches = [] log_info = self.get_log_info_for_source() # Loop over all logs declared in logs.json for log_name, attributes in log_info.iteritems(): # Get the parser type to use for this log parser_name = payload.type or attributes['parser'] schema = attributes['schema'] options = attributes.get('configuration', {}) # Setup the parser class parser_class = get_parser(parser_name) parser = parser_class(options) # Get a list of parsed records LOGGER.debug('Trying schema: %s', log_name) parsed_data = parser.parse(schema, payload.pre_parsed_record) if not parsed_data: continue LOGGER.debug('Parsed %d records with schema %s', len(parsed_data), log_name) if SUPPORT_MULTIPLE_SCHEMA_MATCHING: schema_matches.append( schema_match(log_name, schema, parser, parsed_data)) continue log_patterns = parser.options.get('log_patterns') if all( parser.matched_log_pattern(rec, log_patterns) for rec in parsed_data): return [schema_match(log_name, schema, parser, parsed_data)] return schema_matches
def _convert_type(self, parsed_data, schema, options): """Convert a parsed payload's values into their declared types. If the schema is incorrectly defined for a particular field, this function will return False which will make the payload invalid. Args: parsed_data: Parsed payload dict schema: data schema for a specific log source options: parser options dict Returns: parsed dict payload with typed values """ # check for list types here payload = parsed_data for key, value in schema.iteritems(): key = str(key) # if the schema value is declared as string if value == 'string': payload[key] = str(payload[key]) # if the schema value is declared as integer elif value == 'integer': try: payload[key] = int(payload[key]) except ValueError as e: logger.error('Invalid schema - %s is not an int', key) return False elif isinstance(value, (OrderedDict)): if len(value) == 0: pass else: # handle nested csv if isinstance(payload[key], str): options['hints'] = options['hints'][key] parse_csv = get_parser('csv') parsed_nested_key = parse_csv(payload[key], schema[key], options).parse() # Call the first element since a list is returned payload[key] = parsed_nested_key[0] self._convert_type(payload[key], schema[key], options) else: logger.error('Invalid declared type - %s', value) return payload
def _process_log_schemas(self, payload, data): """Get any log schemas that matched this log format Args: payload: A StreamAlert payload object data: Pre parsed data string from a raw_event to be parsed Returns: [list] A list containing any schemas that matched this log format Each list entry contains the namedtuple of 'ClassifiedLog' with values of log_name, root_schema, parser, and parsed_data """ classified_log = namedtuple('ClassifiedLog', 'log_name, root_schema, parser, parsed_data') log_metadata = self._log_metadata() valid_parses = [] # Loop over all logs declared in logs.json for log_name, attributes in log_metadata.iteritems(): # get the parser type to use for this log parser_name = payload.type or attributes['parser'] schema = attributes['schema'] options = attributes.get('configuration', {}) # Setup the parser class parser_class = get_parser(parser_name) parser = parser_class(options) # Get a list of parsed records parsed_data = parser.parse(schema, data) LOGGER.debug('schema: %s', schema) if not parsed_data: continue if SUPPORT_MULTIPLE_SCHEMA_MATCHING: valid_parses.append(classified_log(log_name, schema, parser, parsed_data)) continue log_patterns = parser.options.get('log_patterns') if all(parser.matched_log_pattern(rec, log_patterns) for rec in parsed_data): return [classified_log(log_name, schema, parser, parsed_data)] return valid_parses
def test_process_subkeys_nested_records(self): """Rules Engine - Required Subkeys with Nested Records""" def cloudtrail_us_east_logs(rec): return ('us-east' in rec['awsRegion'] and 'AWS' in rec['requestParameters']['program']) rule_attrs = RuleAttributes( rule_name='cloudtrail_us_east_logs', rule_function=cloudtrail_us_east_logs, matchers=[], datatypes=[], logs=['test_log_type_json_nested'], outputs=['s3:sample_bucket'], req_subkeys={'requestParameters': ['program']}) data = json.dumps({ 'Records': [ { 'eventVersion': '1.05', 'eventID': '2', 'eventTime': '3', 'requestParameters': { 'program': 'AWS CLI' }, 'eventType': 'CreateSomeResource', 'responseElements': 'Response', 'awsRegion': 'us-east-1', 'eventName': 'CreateResource', 'userIdentity': { 'name': 'john', 'key': 'AVC124313414' }, 'eventSource': 'Kinesis', 'requestID': '12345', 'userAgent': 'AWS CLI v1.3109', 'sourceIPAddress': '127.0.0.1', 'recipientAccountId': '123456123456' }, { 'eventVersion': '1.05', 'eventID': '2', 'eventTime': '3', 'requestParameters': { 'program': 'AWS UI' }, 'eventType': 'CreateSomeOtherResource', 'responseElements': 'Response', 'awsRegion': 'us-east-2', 'eventName': 'CreateResource', 'userIdentity': { 'name': 'ann', 'key': 'AD114313414' }, 'eventSource': 'Lambda', 'requestID': '12345', 'userAgent': 'Google Chrome 42', 'sourceIPAddress': '127.0.0.2', 'recipientAccountId': '123456123456' }, { 'eventVersion': '1.05', 'eventID': '2', 'eventTime': '3', # Translates from null in JSON to None in Python 'requestParameters': None, 'eventType': 'CreateSomeResource', 'responseElements': 'Response', 'awsRegion': 'us-east-1', 'eventName': 'CreateResource', 'userIdentity': { 'name': 'john', 'key': 'AVC124313414' }, 'eventSource': 'Kinesis', 'requestID': '12345', 'userAgent': 'AWS CLI', 'sourceIPAddress': '127.0.0.1', 'recipientAccountId': '123456123456' } ] }) schema = self.config['logs']['test_cloudtrail']['schema'] options = self.config['logs']['test_cloudtrail']['configuration'] parser_class = get_parser('json') parser = parser_class(options) parsed_result = parser.parse(schema, data) valid_record = [ rec for rec in parsed_result if rec['requestParameters'] is not None ][0] valid_subkey_check = StreamRules.process_subkeys( valid_record, 'json', rule_attrs) assert_true(valid_subkey_check) invalid_record = [ rec for rec in parsed_result if rec['requestParameters'] is None ][0] invalid_subkey_check = StreamRules.process_subkeys( invalid_record, 'json', rule_attrs) assert_false(invalid_subkey_check)
def setup(self): """Setup before each method""" # load config self.config = load_config('test/unit/conf') # load JSON parser class self.parser_class = get_parser('gzip-json')
def setup_class(cls): """Setup the class before any methods""" # load config cls.config = load_config('tests/unit/conf') # load the parser class cls.parser_class = get_parser(cls._parser_type())
def setup_class(cls): """Setup the class before any methods""" # load config cls.config = load_config('test/unit/conf') # load JSON parser class cls.parser_class = get_parser('kv')
def _parse(self, payload, data): """Parse a record into a declared type. Args: payload: A StreamAlert payload object data: Pre parsed data string from a raw_event to be parsed Sets: payload.log_source: The detected log name from the data_sources config. payload.type: The record's type. payload.records: The parsed record. Returns: A boolean representing the success of the parse. """ log_metadata = self.log_metadata(payload) # TODO(jack) make this process more efficient. # Separate out parsing with key matching. # Right now, if keys match but the type/parser is correct, # it has to start over for log_name, attributes in log_metadata.iteritems(): # short circuit parser determination if not payload.type: parser_name = attributes['parser'] else: parser_name = payload.type options = {} options['hints'] = attributes.get('hints') options['delimiter'] = attributes.get('delimiter') options['separator'] = attributes.get('separator') options['parser'] = parser_name options['service'] = payload.service schema = attributes['schema'] # Setup the parser parser_class = get_parser(parser_name) parser = parser_class(data, schema, options) options['nested_keys'] = parser.__dict__.get('nested_keys') # A list of parsed records parsed_data = parser.parse() # Used for short circuiting parser determination if parser.payload_type: payload.type = parser.payload_type if parsed_data: logger.debug('log name: %s', log_name) logger.debug('parsed_data: %s', parsed_data) typed_data = [] for data in parsed_data: # convert data types per the schema typed_data.append(self._convert_type(data, schema, options)) if typed_data: payload.log_source = log_name payload.type = parser_name payload.records = typed_data return True return False