Пример #1
0
    def testRepresentInvalidInputError(self):
        # Test basic string conversion.
        exception = TranslationFormatInvalidInputError()
        self.assertEqual(str(exception), "Invalid input")

        exception = TranslationFormatInvalidInputError(filename="foo")
        self.assertEqual(str(exception), "foo: Invalid input")

        exception = TranslationFormatInvalidInputError(line_number=9)
        self.assertEqual(str(exception), "Line 9: Invalid input")

        exception = TranslationFormatInvalidInputError(filename="foo",
                                                       line_number=9)
        self.assertEqual(str(exception), "foo, line 9: Invalid input")

        exception = TranslationFormatInvalidInputError(message="message")
        self.assertEqual(str(exception), "message")

        exception = TranslationFormatInvalidInputError(filename="foo",
                                                       message="message")
        self.assertEqual(str(exception), "foo: message")

        exception = TranslationFormatInvalidInputError(line_number=9,
                                                       message="message")
        self.assertEqual(str(exception), "Line 9: message")

        exception = TranslationFormatInvalidInputError(filename="foo",
                                                       line_number=9,
                                                       message="message")
        self.assertEqual(str(exception), "foo, line 9: message")
Пример #2
0
    def testNonAsciiInvalidInputError(self):
        # Test input errors that use non-ascii characters.

        # Here's one with a Thai "r" character in its message.
        exception = TranslationFormatInvalidInputError(filename=u"ror-rua",
                                                       line_number=2,
                                                       message=u"r\u0e23")
        representation = str(exception)
        self.assertEqual(representation, "ror-rua, line 2: r\\u0e23")

        # And here's one with the Khmer equivalent in its filename.
        exception = TranslationFormatInvalidInputError(
            filename=u"ro-\u179a", message=u"hok baay heuy?")
        representation = str(exception)
        self.assertEqual(representation, "ro-\\u179a: hok baay heuy?")
Пример #3
0
    def _storeCurrentMessage(self):
        if self._message is not None:
            msgkey = self._message.msgid_singular
            if self._message.context is not None:
                msgkey = '%s\2%s' % (self._message.context, msgkey)
            if msgkey in self._messageids:
                # We use '%r' instead of '%d' because there are situations
                # when it returns an "<unprintable instance object>". You can
                # see more details on bug #2896
                raise TranslationFormatInvalidInputError(
                    message='PO file: duplicate msgid ending on line %r' %
                    (self._message_lineno))

            number_plural_forms = (
                self._translation_file.header.number_plural_forms)
            if (self._message.msgid_plural
                    and len(self._message.translations) < number_plural_forms):
                # Has plural forms but the number of translations is lower.
                # Fill the others with an empty string.
                for index in range(len(self._message.translations),
                                   number_plural_forms):
                    self._message.addTranslation(index, u'')

            self._translation_file.messages.append(self._message)
            self._messageids.add(msgkey)
            self._message = None
Пример #4
0
    def _decode(self):
        # is there anything to convert?
        if not self._pending_chars:
            return

        # if the PO header hasn't been parsed, then we don't know the
        # encoding yet
        if self._translation_file.header is None:
            return

        charset = self._translation_file.header.charset
        decode = codecs.getdecoder(charset)
        # decode as many characters as we can:
        try:
            newchars, length = decode(self._pending_chars, 'strict')
        except UnicodeDecodeError as exc:
            # XXX: James Henstridge 2006-03-16:
            # If the number of unconvertable chars is longer than a
            # multibyte sequence to be, the UnicodeDecodeError indicates
            # a real error, rather than a partial read.
            # I don't know what the longest multibyte sequence in the
            # encodings we need to support, but it shouldn't be more
            # than 10 bytes ...
            if len(self._pending_chars) - exc.start > 10:
                raise TranslationFormatInvalidInputError(
                    line_number=self._lineno,
                    message="Could not decode input from %s" % charset)
            newchars, length = decode(self._pending_chars[:exc.start],
                                      'strict')
        self._pending_unichars += newchars
        self._pending_chars = self._pending_chars[length:]
Пример #5
0
    def __init__(self, filename, archive, xpi_path=None, manifest=None):
        """Open zip (or XPI, or jar) file and scan its contents.

        :param filename: Name of this zip (XPI/jar) archive.
        :param archive: File-like object containing this zip archive.
        :param xpi_path: Full path of this file inside the XPI archive.
            Leave out for the XPI archive itself.
        :param manifest: `XpiManifest` representing the XPI archive's
            manifest file, if any.
        """
        self.filename = filename
        self.header = None
        self.last_translator = None
        self.manifest = manifest
        try:
            self.archive = ZipFile(archive, 'r')
        except BadZipfile as exception:
            raise TranslationFormatInvalidInputError(
                filename=filename, message=str(exception))

        if xpi_path is None:
            # This is the main XPI file.
            xpi_path = ''
            contained_files = set(self.archive.namelist())
            if manifest is None:
                # Look for a manifest.
                for filename in ['chrome.manifest', 'en-US.manifest']:
                    if filename in contained_files:
                        manifest_content = self.archive.read(filename)
                        self.manifest = XpiManifest(manifest_content)
                        break
            if 'install.rdf' in contained_files:
                rdf_content = self.archive.read('install.rdf')
                self.header = XpiHeader(rdf_content)

        # Strip trailing newline to avoid doubling it.
        xpi_path = xpi_path.rstrip('/')

        self._begin()

        # Process zipped files.  Sort by path to keep ordering deterministic.
        # Ordering matters in sequence numbering (which in turn shows up in
        # the UI), but also for consistency in duplicates resolution and for
        # automated testing.
        for entry in sorted(self.archive.namelist()):
            self._processEntry(entry, xpi_path)

        self._finish()
Пример #6
0
    def _decode(self, text):
        if text is None or isinstance(text, unicode):
            # There is noo need to do anything.
            return text
        charset = self.charset
        try:
            text = unicode(text, charset)
        except UnicodeError:
            self._emitSyntaxWarning('String is not in declared charset %r' %
                                    charset)
            text = unicode(text, charset, 'replace')
        except LookupError:
            raise TranslationFormatInvalidInputError(
                message='Unknown charset %r' % charset)

        return text
    def parse(self, translation_import_queue_entry):
        """See `ITranslationFormatImporter`."""
        self._translation_file = TranslationFileData()
        self.basepath = translation_import_queue_entry.path
        self.productseries = translation_import_queue_entry.productseries
        self.distroseries = translation_import_queue_entry.distroseries
        self.sourcepackagename = (
            translation_import_queue_entry.sourcepackagename)
        self.by_maintainer = translation_import_queue_entry.by_maintainer

        librarian_client = getUtility(ILibrarianClient)
        content = librarian_client.getFileByAlias(
            translation_import_queue_entry.content.id).read()

        parser = MozillaZipImportParser(self.basepath, StringIO(content))
        if parser.header is None:
            raise TranslationFormatInvalidInputError("No install.rdf found")

        self._translation_file.header = parser.header
        self._translation_file.messages = parser.messages

        return self._translation_file
Пример #8
0
    def __init__(self, filename, chrome_path, content):
        self.messages = []
        self.filename = filename
        self.chrome_path = chrome_path

        # .dtd files are supposed to be using UTF-8 encoding, if the file is
        # using another encoding, it's against the standard so we reject it
        try:
            content = content.decode('utf-8')
        except UnicodeDecodeError:
            raise TranslationFormatInvalidInputError(
                'Content is not valid UTF-8 text')

        error_handler = DtdErrorHandler()
        error_handler.filename = filename

        parser = dtdparser.DTDParser()
        parser.set_error_handler(error_handler)
        parser.set_inputsource_factory(DtdInputSourceFactoryStub())
        dtd = MozillaDtdConsumer(parser, filename, chrome_path, self.messages)
        parser.set_dtd_consumer(dtd)
        parser.parse_string(content)
Пример #9
0
    def __init__(self, header_content):
        self._raw_content = header_content
        self.is_fuzzy = False
        self.template_creation_date = None
        self.translation_revision_date = None
        self.language_team = None
        self.has_plural_forms = False
        self.number_plural_forms = 0
        self.plural_form_expression = None
        self.charset = 'UTF-8'
        self.launchpad_export_date = None
        self.comment = None

        if isinstance(header_content, str):
            try:
                self._text = header_content.decode(self.charset)
            except UnicodeDecodeError:
                raise TranslationFormatInvalidInputError(
                    "XPI header is not encoded in %s." % self.charset)
        else:
            assert isinstance(
                header_content,
                unicode), ("XPI header text is neither str nor unicode.")
            self._text = header_content
    def parse(self, content):
        """Parse given content as a property file.

        Once the parse is done, self.messages has a list of the available
        `ITranslationMessageData`s.
        """

        # .properties files are supposed to be unicode-escaped, but we know
        # that there are some .xpi language packs that instead, use UTF-8.
        # That's against the specification, but Mozilla applications accept
        # it anyway, so we try to support it too.
        # To do this support, we read the text as being in UTF-8
        # because unicode-escaped looks like ASCII files.
        try:
            content = content.decode('utf-8')
        except UnicodeDecodeError:
            raise TranslationFormatInvalidInputError(
                'Content is not valid unicode-escaped text')

        line_num = 0
        is_multi_line_comment = False
        last_comment = None
        last_comment_line_num = 0
        ignore_comment = False
        is_message = False
        translation = u''
        for line in content.splitlines():
            # Now, to "normalize" all to the same encoding, we encode to
            # unicode-escape first, and then decode it to unicode
            # XXX: Danilo 2006-08-01: we _might_ get performance
            # improvements if we reimplement this to work directly,
            # though, it will be hard to beat C-based de/encoder.
            # This call unescapes everything so we don't need to care about
            # quotes escaping.
            try:
                string = line.encode('raw-unicode_escape')
                line = string.decode('unicode_escape')
            except UnicodeDecodeError as exception:
                raise TranslationFormatInvalidInputError(
                    filename=self.filename,
                    line_number=line_num,
                    message=str(exception))

            line_num += 1
            if not is_multi_line_comment:
                # Remove any white space before the useful data, like
                # ' # foo'.
                line = line.lstrip()
                if len(line) == 0:
                    # It's an empty line. Reset any previous comment we have.
                    last_comment = None
                    last_comment_line_num = 0
                    ignore_comment = False
                elif line.startswith(u'#') or line.startswith(u'//'):
                    # It's a whole line comment.
                    ignore_comment = False
                    line = line[1:].strip()
                    if last_comment:
                        last_comment += line
                    elif len(line) > 0:
                        last_comment = line

                    if last_comment and not last_comment.endswith('\n'):
                        # Comments must end always with a new line.
                        last_comment += '\n'

                    last_comment_line_num = line_num
                    continue

            # Unescaped URLs are a common mistake: the "//" starts an
            # end-of-line comment.  To work around that, treat "://" as
            # a special case.
            just_saw_colon = False

            while line:
                if is_multi_line_comment:
                    if line.startswith(u'*/'):
                        # The comment ended, we jump the closing tag and
                        # continue with the parsing.
                        line = line[2:]
                        is_multi_line_comment = False
                        last_comment_line_num = line_num
                        if ignore_comment:
                            last_comment = None
                            ignore_comment = False

                        # Comments must end always with a new line.
                        last_comment += '\n'
                    elif line.startswith(self.license_block_text):
                        # It's a comment with a licence notice, this
                        # comment can be ignored.
                        ignore_comment = True
                        # Jump the whole tag
                        line = line[len(self.license_block_text):]
                    else:
                        # Store the character.
                        if last_comment is None:
                            last_comment = line[0]
                        elif last_comment_line_num == line_num:
                            last_comment += line[0]
                        else:
                            last_comment = u'%s\n%s' % (last_comment, line[0])
                            last_comment_line_num = line_num
                        # Jump the processed char.
                        line = line[1:]
                    continue
                elif line.startswith(u'/*'):
                    # It's a multi line comment
                    is_multi_line_comment = True
                    ignore_comment = False
                    last_comment_line_num = line_num
                    # Jump the comment starting tag
                    line = line[2:]
                    continue
                elif line.startswith(u'//') and not just_saw_colon:
                    # End-of-line comment.
                    last_comment = '%s\n' % line[2:].strip()
                    last_comment_line_num = line_num
                    # On to next line.
                    break
                elif is_message:
                    # Store the char and continue.
                    head_char = line[0]
                    translation += head_char
                    line = line[1:]
                    just_saw_colon = (head_char == ':')
                    continue
                elif u'=' in line:
                    # Looks like a message string.
                    (key, value) = line.split('=', 1)
                    # Remove leading and trailing white spaces.
                    key = key.strip()

                    if valid_property_msgid(key):
                        is_message = True
                        # Jump the msgid, control chars and leading white
                        # space.
                        line = value.lstrip()
                        continue
                    else:
                        raise TranslationFormatSyntaxError(
                            line_number=line_num,
                            message=u"invalid msgid: '%s'" % key)
                else:
                    # Got a line that is not a valid message nor a valid
                    # comment. Ignore it because main en-US.xpi catalog from
                    # Firefox has such line/error. We follow the 'be strict
                    # with what you export, be permisive with what you import'
                    # policy.
                    break
            if is_message:
                # We just parsed a message, so we need to add it to the list
                # of messages.
                if ignore_comment or last_comment_line_num < line_num - 1:
                    # We must ignore the comment or either the comment is not
                    # the last thing before this message or is not in the same
                    # line as this message.
                    last_comment = None
                    ignore_comment = False

                message = TranslationMessageData()
                message.msgid_singular = key
                message.context = self.chrome_path
                message.file_references_list = [
                    "%s:%d(%s)" % (self.filename, line_num, key)
                ]
                value = translation.strip()
                message.addTranslation(TranslationConstants.SINGULAR_FORM,
                                       value)
                message.singular_text = value
                message.source_comment = last_comment
                self.messages.append(message)

                # Reset status vars.
                last_comment = None
                last_comment_line_num = 0
                is_message = False
                translation = u''
Пример #11
0
    def _unescapeNumericCharSequence(self, string):
        """Unescape leading sequence of escaped numeric character codes.

        This is for characters given in hexadecimal or octal escape notation.

        :return: a tuple: first, any leading part of `string` as an unescaped
            string (empty if `string` did not start with a numeric escape
            sequence), and second, the remainder of `string` after the leading
            numeric escape sequences have been parsed.
        """
        escaped_string = ''
        position = 0
        length = len(string)
        while position + 1 < length and string[position] == '\\':
            # Handle escaped characters given as numeric character codes.
            # These will still be in the original encoding.  We extract the
            # whole sequence of escaped chars to recode them later into
            # Unicode in a single call.
            lead_char = string[position + 1]
            if lead_char == 'x':
                # Hexadecimal escape.
                position += 4
            elif lead_char.isdigit():
                # Octal escape.
                position += 2
                # Up to two more octal digits.
                for i in range(2):
                    if string[position].isdigit():
                        position += 1
                    else:
                        break
            elif lead_char in ESCAPE_MAP:
                # It's part of our mapping table, we ignore it here.
                break
            else:
                raise TranslationFormatSyntaxError(
                    line_number=self._lineno,
                    message=("Unknown escape sequence %s" %
                             string[position:position + 2]))

        if position == 0:
            # No escaping to be done.
            return '', string

        # We found some text escaped that should be recoded to Unicode.
        # First, we unescape it.
        escaped_string, string = string[:position], string[position:]
        unescaped_string = escaped_string.decode('string-escape')

        if (self._translation_file is not None
                and self._translation_file.header is not None):
            # There is a header, so we know the original encoding for
            # the given string.
            charset = self._translation_file.header.charset
            know_charset = True
        else:
            # We don't know the original encoding of the imported file so we
            # cannot get the right values.  We try ASCII.
            # XXX JeroenVermeulen 2008-02-08: might as well try UTF-8 here.
            # It's a superset, and anything that's not UTF-8 is very unlikely
            # to validate as UTF-8.
            charset = 'ascii'
            know_charset = False

        try:
            decoded_text = unescaped_string.decode(charset)
        except UnicodeDecodeError:
            if know_charset:
                message = ("Could not decode escaped string as %s: (%s)" %
                           (charset, escaped_string))
            else:
                message = ("Could not decode escaped string: (%s)" %
                           escaped_string)
            raise TranslationFormatInvalidInputError(line_number=self._lineno,
                                                     message=message)

        return decoded_text, string
Пример #12
0
 def fatal(self, msg):
     raise TranslationFormatInvalidInputError(
         filename=self.filename, message=msg)