Пример #1
0
    def test_word(self):
        message = Message()
        message.append_text('Hello. ')

        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)
        self.assertEqual(segments, [((TEXT, u'Hello.'),)])
Пример #2
0
    def test_word(self):
        message = Message()
        message.append_text('Hello. ')

        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)
        self.assertEqual(segments, [((TEXT, u'Hello.'),)])
Пример #3
0
    def test_etc(self):
        text = u'A lot of animals... And no man'
        result = [((TEXT, u'A lot of animals...'),), ((TEXT, u'And no man'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #4
0
    def test_number(self):
        text = u'The 12.54 and 12,54 and 152.'
        result = [((TEXT, u'The 12.54 and 12,54 and 152.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #5
0
    def test_number(self):
        text = u'The 12.54 and 12,54 and 152.'
        result = [((TEXT, u'The 12.54 and 12,54 and 152.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #6
0
    def test_punctuation(self):
        text = u'A Ph.D in          mathematics?!!!!'
        result =  [((TEXT, u'A Ph.D in mathematics?!!!!'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #7
0
    def test_single_character(self):
        text = u'I am T. From.'
        result = [((TEXT, u'I am T. From.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #8
0
    def test_tab(self):
        text = '\n\t   This folder is empty.\n\t   '
        result = [((TEXT, u'This folder is empty.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #9
0
    def test_between_number(self):
        text = u'Price: -12.25 Euro.'
        result = [((TEXT, u'Price:'),), ((TEXT, u'-12.25 Euro.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #10
0
    def test_unknown_abrevations(self):
        text = u'E.T. is beautiful.'
        result = [((TEXT, u'E.T. is beautiful.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #11
0
    def test_punctuation(self):
        text = u'A Ph.D in          mathematics?!!!!'
        result =  [((TEXT, u'A Ph.D in mathematics?!!!!'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #12
0
    def test_unknown_abrevations(self):
        text = u'E.T. is beautiful.'
        result = [((TEXT, u'E.T. is beautiful.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #13
0
    def test_between_number(self):
        text = u'Price: -12.25 Euro.'
        result = [((TEXT, u'Price:'),), ((TEXT, u'-12.25 Euro.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #14
0
    def test_single_character(self):
        text = u'I am T. From.'
        result = [((TEXT, u'I am T. From.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #15
0
    def test_simple(self):
        text = u'This is a sentence. A very little sentence.'
        result =[((TEXT, u'This is a sentence.'),),
                 ((TEXT, u'A very little sentence.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)
        self.assertEqual(segments, result)
Пример #16
0
    def test_simple(self):
        text = u'This is a sentence. A very little sentence.'
        result =[((TEXT, u'This is a sentence.'),),
                 ((TEXT, u'A very little sentence.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)
        self.assertEqual(segments, result)
Пример #17
0
    def test_tab(self):
        text = '\n\t   This folder is empty.\n\t   '
        result = [((TEXT, u'This folder is empty.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #18
0
    def test_etc(self):
        text = u'A lot of animals... And no man'
        result = [((TEXT, u'A lot of animals...'),), ((TEXT, u'And no man'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #19
0
    def test_newline(self):
        text = 'And you must show them these terms so they know their\n' \
               'rights.\n'
        result = [((TEXT,
          u'And you must show them these terms so they know their rights.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #20
0
    def test_semicolon(self):
        text = 'Write to the Free Software Foundation; we sometimes make' \
               ' exceptions for this.'
        result =  [((TEXT, u'Write to the Free Software Foundation;'),),
                   ((TEXT, u'we sometimes make exceptions for this.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #21
0
    def test_newline(self):
        text = 'And you must show them these terms so they know their\n' \
               'rights.\n'
        result = [((TEXT,
          u'And you must show them these terms so they know their rights.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #22
0
    def test_semicolon(self):
        text = 'Write to the Free Software Foundation; we sometimes make' \
               ' exceptions for this.'
        result =  [((TEXT, u'Write to the Free Software Foundation;'),),
                   ((TEXT, u'we sometimes make exceptions for this.'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #23
0
    def test_parentheses2(self):
        text = '(Hereinafter, translation is included without limitation' \
               ' in the term "modification".)  Each licensee is addressed' \
               ' as "you".'
        result = [((TEXT, u'(Hereinafter, translation is included without '
                          u'limitation in the term "modification".) Each '
                          u'licensee is addressed as "you".'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #24
0
    def test_raw_text(self):
        text = u'This is raw text. Every characters must be kept. ' \
               u'1 space 2 spaces  3 spaces   1 newline\nend.'
        expected = [((TEXT, u'This is raw text.'),),
                    ((TEXT, u'Every characters must be kept.'),),
                    ((TEXT, u'1 space 2 spaces  3 spaces   1 newline\nend.'),)
                    ]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message, keep_spaces=True):
            segments.append(seg)

        self.assertEqual(segments, expected)
Пример #25
0
    def test_raw_text(self):
        text = u'This is raw text. Every characters must be kept. ' \
               u'1 space 2 spaces  3 spaces   1 newline\nend.'
        expected = [((TEXT, u'This is raw text.'),),
                    ((TEXT, u'Every characters must be kept.'),),
                    ((TEXT, u'1 space 2 spaces  3 spaces   1 newline\nend.'),)
                    ]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message, keep_spaces=True):
            segments.append(seg)

        self.assertEqual(segments, expected)
Пример #26
0
    def test_parentheses2(self):
        text = '(Hereinafter, translation is included without limitation' \
               ' in the term "modification".)  Each licensee is addressed' \
               ' as "you".'
        result = [((TEXT, u'(Hereinafter, translation is included without '
                          u'limitation in the term "modification".) Each '
                          u'licensee is addressed as "you".'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #27
0
    def test_parentheses1(self):
        text = (
            '(Exception: if the Program itself is interactive but does not'
            ' normally print such an announcement, your work based on the'
            ' Program is not required to print an announcement.)  ')
        result = [((TEXT, u'(Exception:'),),
                  ((TEXT, u'if the Program itself is interactive but does '
                          u'not normally print such an announcement, your '
                          u'work based on the Program is not required to '
                          u'print an announcement.)'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #28
0
    def test_parentheses1(self):
        text = (
            '(Exception: if the Program itself is interactive but does not'
            ' normally print such an announcement, your work based on the'
            ' Program is not required to print an announcement.)  ')
        result = [((TEXT, u'(Exception:'),),
                  ((TEXT, u'if the Program itself is interactive but does '
                          u'not normally print such an announcement, your '
                          u'work based on the Program is not required to '
                          u'print an announcement.)'),)]

        message = Message()
        message.append_text(text)
        segments = []
        for seg, context, offset in get_segments(message):
            segments.append(seg)

        self.assertEqual(segments, result)
Пример #29
0
 def test_abrevations(self):
     # 1
     text = u'This is Toto Inc. a big company.'
     result = [((TEXT, u'This is Toto Inc. a big company.'),)]
     message = Message()
     message.append_text(text)
     segments = []
     for seg, context, offset in get_segments(message):
         segments.append(seg)
     self.assertEqual(segments, result)
     # 2
     text = u'Mr. From'
     result =  [((TEXT, u'Mr. From'),)]
     message = Message()
     message.append_text(text)
     segments = []
     for seg, context, offset in get_segments(message):
         segments.append(seg)
     self.assertEqual(segments, result)
Пример #30
0
 def test_abrevations(self):
     # 1
     text = u'This is Toto Inc. a big company.'
     result = [((TEXT, u'This is Toto Inc. a big company.'),)]
     message = Message()
     message.append_text(text)
     segments = []
     for seg, context, offset in get_segments(message):
         segments.append(seg)
     self.assertEqual(segments, result)
     # 2
     text = u'Mr. From'
     result =  [((TEXT, u'Mr. From'),)]
     message = Message()
     message.append_text(text)
     segments = []
     for seg, context, offset in get_segments(message):
         segments.append(seg)
     self.assertEqual(segments, result)
Пример #31
0
def _get_translatable_blocks(events):
    # Default value
    encoding = 'utf-8'

    # To identify the begin/end format
    id = 0
    id_stack = []
    context_stack = [None]
    stream = None

    message = Message()
    skip_level = 0
    for event in events:
        type, value, line = event

        # Set the good encoding
        if type == XML_DECL:
            encoding = value[1]
        # And now, we catch only the good events
        elif type == START_ELEMENT:
            if skip_level > 0:
                skip_level += 1
                if stream:
                    stream.append(event)
                    continue
            else:
                tag_uri, tag_name, attributes = value
                schema = get_element_schema(tag_uri, tag_name)

                # Context management
                if schema.context is not None:
                    context_stack.append(schema.context)

                # Skip content ?
                if schema.skip_content:
                    skip_level = 1
                    if id_stack:
                        stream = [event]
                        continue
                # Is inline ?
                elif schema.is_inline:
                    id += 1
                    id_stack.append(id)

                    start_format = _make_start_format(tag_uri, tag_name,
                                                      attributes, encoding)
                    message.append_start_format(start_format, id, line)
                    continue
                elif id_stack:
                    skip_level = 1
                    stream = [event]
                    continue
        elif type == END_ELEMENT:
            if skip_level > 0:
                skip_level -= 1
                if stream:
                    stream.append(event)
                    if skip_level == 0:
                        id += 1
                        aux = stream_to_str(stream, encoding)
                        aux = unicode(aux, encoding)
                        aux = [(aux, False, context_stack[-1])]
                        message.append_start_format(aux, id, line)
                        message.append_end_format([], id, line)
                        stream = None
                    continue
            else:
                tag_uri, tag_name = value[:2]
                schema = get_element_schema(tag_uri, tag_name)

                # Context management
                if schema.context is not None:
                    context_stack.pop()

                # Is inline ?
                if schema.is_inline:
                    message.append_end_format([(get_end_tag(value), False,
                                                None)], id_stack.pop(), line)
                    continue
        elif type == TEXT:
            # Not empty ?
            if stream:
                stream.append(event)
                continue
            elif skip_level == 0 and (value.strip() != '' or message):
                value = XMLContent.encode(value)
                value = unicode(value, encoding)
                message.append_text(value, line, context_stack[-1])
                continue
        elif type == COMMENT:
            if stream:
                stream.append(event)
                continue
            elif message:
                id += 1
                if isinstance(value, str):
                    value = unicode(value, encoding)
                value = u'<!--%s-->' % value
                message.append_start_format([(value, False, None)], id, line)
                message.append_end_format([], id, line)
                continue

        # Not a good event => break + send the event
        if message:
            yield MESSAGE, message, message.get_line()
            message = Message()

        yield event
    # Send the last message!
    if message:
        yield MESSAGE, message, message.get_line()