def _FormatMessage(item, lang): """Format a single <message> element.""" message = item.ws_at_start + item.Translate(lang) + item.ws_at_end # Output message with non-ascii chars escaped as octal numbers C's grammar # allows escaped hexadecimal numbers to be infinite, but octal is always of # the form \OOO. Python 3 doesn't support string-escape, so we have to jump # through some hoops here via codecs.escape_encode. # This basically does: # - message - the starting string # - message.encode(...) - convert to bytes # - codecs.escape_encode(...) - convert non-ASCII bytes to \x## escapes # - (...).decode() - convert bytes back to a string message = codecs.escape_encode(message.encode('utf-8'))[0].decode('utf-8') # an escaped char is (\xHH)+ but only if the initial # backslash is not escaped. not_a_backslash = r"(^|[^\\])" # beginning of line or a non-backslash char escaped_backslashes = not_a_backslash + r"(\\\\)*" hex_digits = r"((\\x)[0-9a-f]{2})+" two_digit_hex_num = re.compile(r"(?P<escaped_backslashes>%s)(?P<hex>%s)" % (escaped_backslashes, hex_digits)) message = two_digit_hex_num.sub(_HexToOct, message) # unescape \ (convert \\ back to \) message = message.replace('\\\\', '\\') message = message.replace('"', '\\"') message = util.LINEBREAKS.sub(r'\\n', message) name_attr = item.GetTextualIds()[0] return '\n case %s:\n return "%s";' % (name_attr, message)
def _FormatMessage(item, lang): """Format a single <message> element.""" message = item.ws_at_start + item.Translate(lang) + item.ws_at_end # output message with non-ascii chars escaped as octal numbers # C's grammar allows escaped hexadecimal numbers to be infinite, # but octal is always of the form \OOO message = message.encode('utf-8').encode('string_escape') # an escaped char is (\xHH)+ but only if the initial # backslash is not escaped. not_a_backslash = r"(^|[^\\])" # beginning of line or a non-backslash char escaped_backslashes = not_a_backslash + r"(\\\\)*" hex_digits = r"((\\x)[0-9a-f]{2})+" two_digit_hex_num = re.compile( r"(?P<escaped_backslashes>%s)(?P<hex>%s)" % (escaped_backslashes, hex_digits)) message = two_digit_hex_num.sub(_HexToOct, message) # unescape \ (convert \\ back to \) message = message.replace('\\\\', '\\') message = message.replace('"', '\\"') message = util.LINEBREAKS.sub(r'\\n', message) name_attr = item.GetTextualIds()[0] return '\n case %s:\n return "%s";' % (name_attr, message)