def new_general_entity(self, name, value): """See `xmldtd.WFCDTD`.""" if not self.started: return message = TranslationMessageData() message.msgid_singular = name # CarlosPerelloMarin 20070326: xmldtd parser does an inline # parsing which means that the content is all in a single line so we # don't have a way to show the line number with the source reference. message.file_references_list = ["%s(%s)" % (self.filename, name)] message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.context = self.chrome_path message.source_comment = self.last_comment self.messages.append(message) self.started += 1 self.last_comment = None
def parse(self, content): """Parse given content as a property file. Once the parse is done, self.messages has a list of the available `ITranslationMessageData`s. """ # .properties files are supposed to be unicode-escaped, but we know # that there are some .xpi language packs that instead, use UTF-8. # That's against the specification, but Mozilla applications accept # it anyway, so we try to support it too. # To do this support, we read the text as being in UTF-8 # because unicode-escaped looks like ASCII files. try: content = content.decode('utf-8') except UnicodeDecodeError: raise TranslationFormatInvalidInputError( 'Content is not valid unicode-escaped text') line_num = 0 is_multi_line_comment = False last_comment = None last_comment_line_num = 0 ignore_comment = False is_message = False translation = u'' for line in content.splitlines(): # Now, to "normalize" all to the same encoding, we encode to # unicode-escape first, and then decode it to unicode # XXX: Danilo 2006-08-01: we _might_ get performance # improvements if we reimplement this to work directly, # though, it will be hard to beat C-based de/encoder. # This call unescapes everything so we don't need to care about # quotes escaping. try: string = line.encode('raw-unicode_escape') line = string.decode('unicode_escape') except UnicodeDecodeError as exception: raise TranslationFormatInvalidInputError( filename=self.filename, line_number=line_num, message=str(exception)) line_num += 1 if not is_multi_line_comment: # Remove any white space before the useful data, like # ' # foo'. line = line.lstrip() if len(line) == 0: # It's an empty line. Reset any previous comment we have. last_comment = None last_comment_line_num = 0 ignore_comment = False elif line.startswith(u'#') or line.startswith(u'//'): # It's a whole line comment. ignore_comment = False line = line[1:].strip() if last_comment: last_comment += line elif len(line) > 0: last_comment = line if last_comment and not last_comment.endswith('\n'): # Comments must end always with a new line. last_comment += '\n' last_comment_line_num = line_num continue # Unescaped URLs are a common mistake: the "//" starts an # end-of-line comment. To work around that, treat "://" as # a special case. just_saw_colon = False while line: if is_multi_line_comment: if line.startswith(u'*/'): # The comment ended, we jump the closing tag and # continue with the parsing. line = line[2:] is_multi_line_comment = False last_comment_line_num = line_num if ignore_comment: last_comment = None ignore_comment = False # Comments must end always with a new line. last_comment += '\n' elif line.startswith(self.license_block_text): # It's a comment with a licence notice, this # comment can be ignored. ignore_comment = True # Jump the whole tag line = line[len(self.license_block_text):] else: # Store the character. if last_comment is None: last_comment = line[0] elif last_comment_line_num == line_num: last_comment += line[0] else: last_comment = u'%s\n%s' % (last_comment, line[0]) last_comment_line_num = line_num # Jump the processed char. line = line[1:] continue elif line.startswith(u'/*'): # It's a multi line comment is_multi_line_comment = True ignore_comment = False last_comment_line_num = line_num # Jump the comment starting tag line = line[2:] continue elif line.startswith(u'//') and not just_saw_colon: # End-of-line comment. last_comment = '%s\n' % line[2:].strip() last_comment_line_num = line_num # On to next line. break elif is_message: # Store the char and continue. head_char = line[0] translation += head_char line = line[1:] just_saw_colon = (head_char == ':') continue elif u'=' in line: # Looks like a message string. (key, value) = line.split('=', 1) # Remove leading and trailing white spaces. key = key.strip() if valid_property_msgid(key): is_message = True # Jump the msgid, control chars and leading white # space. line = value.lstrip() continue else: raise TranslationFormatSyntaxError( line_number=line_num, message=u"invalid msgid: '%s'" % key) else: # Got a line that is not a valid message nor a valid # comment. Ignore it because main en-US.xpi catalog from # Firefox has such line/error. We follow the 'be strict # with what you export, be permisive with what you import' # policy. break if is_message: # We just parsed a message, so we need to add it to the list # of messages. if ignore_comment or last_comment_line_num < line_num - 1: # We must ignore the comment or either the comment is not # the last thing before this message or is not in the same # line as this message. last_comment = None ignore_comment = False message = TranslationMessageData() message.msgid_singular = key message.context = self.chrome_path message.file_references_list = [ "%s:%d(%s)" % (self.filename, line_num, key) ] value = translation.strip() message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.source_comment = last_comment self.messages.append(message) # Reset status vars. last_comment = None last_comment_line_num = 0 is_message = False translation = u''
def parse(self, content): """Parse given content as a property file. Once the parse is done, self.messages has a list of the available `ITranslationMessageData`s. """ # .properties files are supposed to be unicode-escaped, but we know # that there are some .xpi language packs that instead, use UTF-8. # That's against the specification, but Mozilla applications accept # it anyway, so we try to support it too. # To do this support, we read the text as being in UTF-8 # because unicode-escaped looks like ASCII files. try: content = content.decode('utf-8') except UnicodeDecodeError: raise TranslationFormatInvalidInputError, ( 'Content is not valid unicode-escaped text') line_num = 0 is_multi_line_comment = False last_comment = None last_comment_line_num = 0 ignore_comment = False is_message = False translation = u'' for line in content.splitlines(): # Now, to "normalize" all to the same encoding, we encode to # unicode-escape first, and then decode it to unicode # XXX: Danilo 2006-08-01: we _might_ get performance # improvements if we reimplement this to work directly, # though, it will be hard to beat C-based de/encoder. # This call unescapes everything so we don't need to care about # quotes escaping. try: string = line.encode('raw-unicode_escape') line = string.decode('unicode_escape') except UnicodeDecodeError as exception: raise TranslationFormatInvalidInputError( filename=self.filename, line_number=line_num, message=str(exception)) line_num += 1 if not is_multi_line_comment: # Remove any white space before the useful data, like # ' # foo'. line = line.lstrip() if len(line) == 0: # It's an empty line. Reset any previous comment we have. last_comment = None last_comment_line_num = 0 ignore_comment = False elif line.startswith(u'#') or line.startswith(u'//'): # It's a whole line comment. ignore_comment = False line = line[1:].strip() if last_comment: last_comment += line elif len(line) > 0: last_comment = line if last_comment and not last_comment.endswith('\n'): # Comments must end always with a new line. last_comment += '\n' last_comment_line_num = line_num continue # Unescaped URLs are a common mistake: the "//" starts an # end-of-line comment. To work around that, treat "://" as # a special case. just_saw_colon = False while line: if is_multi_line_comment: if line.startswith(u'*/'): # The comment ended, we jump the closing tag and # continue with the parsing. line = line[2:] is_multi_line_comment = False last_comment_line_num = line_num if ignore_comment: last_comment = None ignore_comment = False # Comments must end always with a new line. last_comment += '\n' elif line.startswith(self.license_block_text): # It's a comment with a licence notice, this # comment can be ignored. ignore_comment = True # Jump the whole tag line = line[len(self.license_block_text):] else: # Store the character. if last_comment is None: last_comment = line[0] elif last_comment_line_num == line_num: last_comment += line[0] else: last_comment = u'%s\n%s' % (last_comment, line[0]) last_comment_line_num = line_num # Jump the processed char. line = line[1:] continue elif line.startswith(u'/*'): # It's a multi line comment is_multi_line_comment = True ignore_comment = False last_comment_line_num = line_num # Jump the comment starting tag line = line[2:] continue elif line.startswith(u'//') and not just_saw_colon: # End-of-line comment. last_comment = '%s\n' % line[2:].strip() last_comment_line_num = line_num # On to next line. break elif is_message: # Store the char and continue. head_char = line[0] translation += head_char line = line[1:] just_saw_colon = (head_char == ':') continue elif u'=' in line: # Looks like a message string. (key, value) = line.split('=', 1) # Remove leading and trailing white spaces. key = key.strip() if valid_property_msgid(key): is_message = True # Jump the msgid, control chars and leading white # space. line = value.lstrip() continue else: raise TranslationFormatSyntaxError( line_number=line_num, message=u"invalid msgid: '%s'" % key) else: # Got a line that is not a valid message nor a valid # comment. Ignore it because main en-US.xpi catalog from # Firefox has such line/error. We follow the 'be strict # with what you export, be permisive with what you import' # policy. break if is_message: # We just parsed a message, so we need to add it to the list # of messages. if ignore_comment or last_comment_line_num < line_num - 1: # We must ignore the comment or either the comment is not # the last thing before this message or is not in the same # line as this message. last_comment = None ignore_comment = False message = TranslationMessageData() message.msgid_singular = key message.context = self.chrome_path message.file_references_list = [ "%s:%d(%s)" % (self.filename, line_num, key)] value = translation.strip() message.addTranslation( TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.source_comment = last_comment self.messages.append(message) # Reset status vars. last_comment = None last_comment_line_num = 0 is_message = False translation = u''