def parse_scalyr_request(payload): """Parses a payload encoded with the Scalyr-specific JSON optimizations. The only place these optimizations are used are creating `AddEvent` requests. NOTE: This method is very fragile and just does enough conversion to support the tests. It could lead to erroneous results if patterns like "`s" and colons are used in strings content. It is also not optimized for performance. :param payload: The request payload :type payload: bytes :return: The parsed request body :rtype: dict """ # Our general strategy is to rewrite the payload to be standard JSON and then use the # standard JSON libraries to parse it. There are two main optimizations we need to undo # here: length-prefixed strings and not using quotes around key names in JSON objects. # First, look for the length-prefixed strings. These are marked by "`sXXXX" where XXXX is a four # byte integer holding the number of bytes in the string. This precedes the string. So we find # all of those and replace them with quotes. We also have to escape the string. # NOTE: It is very important all of our regex work against byte strings because our input is in bytes. length_prefix_re = re.compile(b"`s(....)", re.DOTALL) # Accumulate the rewrite of `payload` here. We will eventually parse this as standard JSON. rewritten_payload = b"" # The index of `payload` up to which we have processed (copied into `rewritten_payload`). last_processed_index = -1 for x in length_prefix_re.finditer(payload): # First add in the bytes between the last processed and the start of this match. rewritten_payload += payload[last_processed_index + 1:x.start(0)] # Read the 4 bytes that describe the length, which is stored in regex group 1. # 2->TODO struct.pack|unpack in python < 2.7.7 does not allow unicode format string. length = compat.struct_unpack_unicode(">i", x.group(1))[0] # Grab the string content as raw bytes. raw_string = payload[x.end(1):x.end(1) + length] text_string = raw_string.decode("utf-8", "replace") rewritten_payload += scalyr_util.json_encode(text_string, binary=True) last_processed_index = x.end(1) + length - 1 rewritten_payload += payload[last_processed_index + 1:len(payload)] # Now convert all places where we do not have quotes around key names to have quotes. # This is pretty fragile.. we look for anything like # foo: # and convert it to # "foo": rewritten_payload = re.sub(b"([\\w\\-]+):", b'"\\1":', rewritten_payload) # NOTE: Special case for Windows where path is C:\ which we don't want to convert rewritten_payload = rewritten_payload.replace(b'"C":\\', b"C:\\") # do the same for the low-case. rewritten_payload = rewritten_payload.replace(b'"c":\\', b"c:\\") return scalyr_util.json_decode(rewritten_payload.decode( "utf-8", "replace"))
def _parse_sent_bytes(self, content): """Parses the stream id and the actual content from the encoded content string sent by the server. @param content: The string sent by the server. @type content: six.binary_type @return: A tuple of the stream_id and the actual content encoded in the sent string. @rtype: (int,six.text_type) """ prefix_code = content[0:4] # 2->TODO struct.pack|unpack in python < 2.7.7 does not allow unicode format string. code = compat.struct_unpack_unicode("i", prefix_code)[0] stream_id = code % 2 num_bytes = code >> 1 self.assertEquals(len(content), num_bytes + 4) decoded_str = content[4:].decode("utf-8") return stream_id, decoded_str
def parse_request(self, input_buffer, num_bytes): """Returns the next complete request from 'input_buffer'. If there is a complete request at the start of 'input_buffer', it is returned. A complete request is one whose initial 4 byte length prefixed has been received as well as the number of bytes specified in that prefix. This method will consume all of those bytes and return only the complete request payload (not the initial 4 byte length field). If no request is found, then None is returned and no bytes are consumed from 'input_buffer' @param input_buffer: The bytes to read. @param num_bytes: The number of bytes available in 'input_buffer'. @return: A string containing the next complete request read from 'input_buffer' or None if there is none. @raise RequestSizeExceeded: If a line is found to exceed the maximum request size. """ original_position = None try: original_position = input_buffer.tell() # Make sure we have 4 bytes so that we can at least read the length prefix, and then try to read # the complete data payload. if num_bytes > self.__prefix_length: # 2->TODO struct.pack|unpack in python < 2.7.7 does not allow unicode format string. (length, ) = compat.struct_unpack_unicode( six.ensure_str(self.__format), input_buffer.read(self.__prefix_length), ) if length > self.__max_request_size: raise RequestSizeExceeded(length, self.__max_request_size) if length + self.__prefix_length <= num_bytes: original_position = None return input_buffer.read(length) return None finally: if original_position is not None: input_buffer.seek(original_position)