def _extract_records(self, json_payload, envelope): """Extract records from the original json payload using the JSON configuration Args: json_payload (dict): The parsed json data Returns: list: A list of JSON records extracted via JSON path or regex """ json_records = [] json_path_expression = self.options.get('json_path') json_regex_key = self.options.get('json_regex_key') # Handle jsonpath extraction of records if json_path_expression: LOGGER.debug('Parsing records with JSONPath') records_jsonpath = jsonpath_rw.parse(json_path_expression) matches = records_jsonpath.find(json_payload) if not matches: return False for match in matches: record = match.value embedded_json = self.options.get('embedded_json') if embedded_json: try: record = json.loads(match.value) except ValueError: LOGGER.warning('Embedded json is invalid') continue if envelope: record.update({ENVELOPE_KEY: envelope}) json_records.append(record) # Handle nested json object regex matching elif json_regex_key and json_payload.get(json_regex_key): LOGGER.debug('Parsing records with JSON Regex Key') match = self.__regex.search(str(json_payload[json_regex_key])) if not match: return False match_str = match.groups('json_blob')[0] try: new_record = json.loads(match_str) except ValueError: LOGGER.debug('Matched regex string is not valid JSON: %s', match_str) return False else: # Make sure the new_record is a dictionary and not a list. # Valid JSON can be either if not isinstance(new_record, dict): return False if envelope: new_record.update({ENVELOPE_KEY: envelope}) json_records.append(new_record) return json_records
def _extract_records(self, json_payload): """Extract records from the original json payload using the JSON configuration Args: json_payload (dict): The parsed json data Returns: list: A list of JSON records extracted via JSON path or regex """ json_records = [] extracted_records = self._extract_json_path(json_payload) if extracted_records is False: return False if extracted_records: if not self.options.get('embedded_json'): return extracted_records for record in extracted_records: try: record = json.loads(record) except (ValueError, TypeError): LOGGER.debug('Embedded json is invalid') continue if not isinstance(record, dict): LOGGER.warning('Record is not a dict: %s', record) continue json_records.append(record) return json_records json_regex_key = self.options.get('json_regex_key') # Handle nested json object regex matching if json_regex_key and json_payload.get(json_regex_key): LOGGER.debug('Parsing records with JSON Regex Key') match = self.__regex.search(str(json_payload[json_regex_key])) if not match: return False match_str = match.groups('json_blob')[0] try: new_record = json.loads(match_str) except ValueError: LOGGER.debug('Matched regex string is not valid JSON: %s', match_str) return False else: # Make sure the new_record is a dictionary and not a list. # Valid JSON can be either if not isinstance(new_record, dict): return False json_records.append(new_record) return json_records