示例#1
0
 def parse_placeables(self):
     """parses placeables"""
     count = 0
     for parsedfile in self.parsedfiles:
         for unit in parsedfile.units:
             placeables.parse(unit.source, placeables.general.parsers)
             placeables.parse(unit.target, placeables.general.parsers)
         count += len(parsedfile.units)
     print("counted %d units" % count)
示例#2
0
 def parse_placeables(self):
     """parses placeables"""
     count = 0
     for parsedfile in self.parsedfiles:
         for unit in parsedfile.units:
             placeables.parse(unit.source, placeables.general.parsers)
             placeables.parse(unit.target, placeables.general.parsers)
         count += len(parsedfile.units)
     print("counted %d units" % count)
    def test_simple_terminology(self):
        TerminologyPlaceable.matchers = [self.matcher]
        tree = parse(self.test_string, general.parsers + term_parsers)

        assert isinstance(tree.sub[0], general.XMLTagPlaceable)
        assert isinstance(tree.sub[2], general.XMLTagPlaceable)

        tree.print_tree()
        term = tree.sub[3].sub[1]

        assert isinstance(term, TerminologyPlaceable)
        assert six.text_type(term) == self.term_po.getunits()[2].source
        assert term.translate() == six.text_type(self.term_po.getunits()[2].target)
示例#4
0
    def test_simple_terminology(self):
        TerminologyPlaceable.matchers = [self.matcher]
        tree = parse(self.test_string, general.parsers + term_parsers)

        assert isinstance(tree.sub[0], general.XMLTagPlaceable)
        assert isinstance(tree.sub[2], general.XMLTagPlaceable)

        tree.print_tree()
        term = tree.sub[3].sub[1]

        assert isinstance(term, TerminologyPlaceable)
        assert unicode(term) == self.term_po.getunits()[2].source
        assert term.translate() == unicode(self.term_po.getunits()[2].target)
示例#5
0
    def test_delete_range_case4(self):
        # Case 4: Across multiple elements #
        elem = self.elem.copy()
        # Delete the last two elements
        deleted, parent, offset = elem.delete_range(elem.elem_offset(elem.sub[2]), len(elem))
        assert deleted == self.elem
        assert parent is None
        assert offset is None
        assert len(elem.sub) == 2
        assert unicode(elem) == u'Ģët <a href="http://www.example.com" alt="Ģët &brand;!">'

        # A separate test case where the delete range include elements between
        # the start- and end elements.
        origelem = parse(u'foo %s bar', general.parsers)
        elem = origelem.copy()
        assert len(elem.sub) == 3
        deleted, parent, offset = elem.delete_range(3, 7)
        assert deleted == origelem
        assert parent is None
        assert offset is None
        assert unicode(elem) == 'foobar'
    def test_delete_range_case4(self):
        # Case 4: Across multiple elements #
        elem = self.elem.copy()
        # Delete the last two elements
        deleted, parent, offset = elem.delete_range(elem.elem_offset(elem.sub[2]), len(elem))
        assert deleted == self.elem
        assert parent is None
        assert offset is None
        assert len(elem.sub) == 2
        assert six.text_type(elem) == u'Ģët <a href="http://www.example.com" alt="Ģët &brand;!">'

        # A separate test case where the delete range include elements between
        # the start- and end elements.
        origelem = parse(u'foo %s bar', general.parsers)
        elem = origelem.copy()
        assert len(elem.sub) == 3
        deleted, parent, offset = elem.delete_range(3, 7)
        assert deleted == origelem
        assert parent is None
        assert offset is None
        assert six.text_type(elem) == 'foobar'
示例#7
0
 def test_find(self):
     assert self.elem.find("example") == 24
     assert self.elem.find("example") == 24
     searchelem = parse("&brand;", general.parsers)
     assert self.elem.find(searchelem) == 46
示例#8
0
 def __init__(self):
     self.elem = parse(self.ORIGSTR, general.parsers)
示例#9
0
def rich_parse(s):
    return parse(s, xliffparsers)
示例#10
0
 def test_find(self):
     assert self.elem.find('example') == 24
     assert self.elem.find(u'example') == 24
     searchelem = parse(u'&brand;', general.parsers)
     assert self.elem.find(searchelem) == 46
示例#11
0
 def __init__(self):
     self.elem = parse(self.ORIGSTR, general.parsers)
示例#12
0
 def __init__(self):
     self.elem = parse(TestStringElem.ORIGSTR, general.parsers)
示例#13
0
文件: utils.py 项目: amire80/pontoon
def mark_placeables(text):
    """Wrap placeables to easily distinguish and manipulate them.

    Source: http://bit.ly/1yQOC9B
    """

    class TabEscapePlaceable(base.Ph):
        """Placeable handling tab escapes."""
        istranslatable = False
        regex = re.compile(r'\t')
        parse = classmethod(general.regex_parse)

    class EscapePlaceable(base.Ph):
        """Placeable handling escapes."""
        istranslatable = False
        regex = re.compile(r'\\')
        parse = classmethod(general.regex_parse)

    class SpacesPlaceable(base.Ph):
        """Placeable handling spaces."""
        istranslatable = False
        regex = re.compile('^ +| +$|[\r\n\t] +| {2,}')
        parse = classmethod(general.regex_parse)

    PARSERS = [
        TabEscapePlaceable.parse,
        EscapePlaceable.parse,
        general.NewlinePlaceable.parse,
        # The spaces placeable can match '\n  ' and mask the newline,
        # so it has to come later.
        SpacesPlaceable.parse,
        general.XMLTagPlaceable.parse,
        general.AltAttrPlaceable.parse,
        general.XMLEntityPlaceable.parse,
        general.PythonFormattingPlaceable.parse,
        general.JavaMessageFormatPlaceable.parse,
        general.FormattingPlaceable.parse,
        # The Qt variables can consume the %1 in %1$s which will mask a printf
        # placeable, so it has to come later.
        general.QtFormattingPlaceable.parse,
        general.UrlPlaceable.parse,
        general.FilePlaceable.parse,
        general.EmailPlaceable.parse,
        general.CapsPlaceable.parse,
        general.CamelCasePlaceable.parse,
        general.OptionPlaceable.parse,
        general.PunctuationPlaceable.parse,
        general.NumberPlaceable.parse,
    ]

    TITLES = {
        'TabEscapePlaceable': "Escaped tab",
        'EscapePlaceable': "Escaped sequence",
        'SpacesPlaceable': "Unusual space in string",
        'AltAttrPlaceable': "'alt' attribute inside XML tag",
        'NewlinePlaceable': "New-line",
        'NumberPlaceable': "Number",
        'QtFormattingPlaceable': "Qt string formatting variable",
        'PythonFormattingPlaceable': "Python string formatting variable",
        'JavaMessageFormatPlaceable': "Java Message formatting variable",
        'FormattingPlaceable': "String formatting variable",
        'UrlPlaceable': "URI",
        'FilePlaceable': "File location",
        'EmailPlaceable': "Email",
        'PunctuationPlaceable': "Punctuation",
        'XMLEntityPlaceable': "XML entity",
        'CapsPlaceable': "Long all-caps string",
        'CamelCasePlaceable': "Camel case string",
        'XMLTagPlaceable': "XML tag",
        'OptionPlaceable': "Command line option",
    }

    output = u""

    # Get a flat list of placeables and StringElem instances
    flat_items = parse(text, PARSERS).flatten()

    for item in flat_items:

        # Placeable: mark
        if isinstance(item, BasePlaceable):
            class_name = item.__class__.__name__
            placeable = unicode(item)

            # CSS class used to mark the placeable
            css = {
                'TabEscapePlaceable': "escape ",
                'EscapePlaceable': "escape ",
                'SpacesPlaceable': "space ",
                'NewlinePlaceable': "escape ",
            }.get(class_name, "")

            title = TITLES.get(class_name, "Unknown placeable")

            spaces = '&nbsp;' * len(placeable)
            if not placeable.startswith(' '):
                spaces = placeable[0] + '&nbsp;' * (len(placeable) - 1)

            # Correctly render placeables in translation editor
            content = {
                'TabEscapePlaceable': u'\\t',
                'EscapePlaceable': u'\\\\',
                'SpacesPlaceable': spaces,
                'NewlinePlaceable': {
                    u'\r\n': u'\\r\\n<br/>\n',
                    u'\r': u'\\r<br/>\n',
                    u'\n': u'\\n<br/>\n',
                }.get(placeable),
                'XMLEntityPlaceable': placeable.replace('&', '&amp;'),
                'XMLTagPlaceable':
                    placeable.replace('<', '&lt;').replace('>', '&gt;'),
            }.get(class_name, placeable)

            output += ('<mark class="%splaceable" title="%s">%s</mark>') \
                % (css, title, content)

        # Not a placeable: skip
        else:
            output += unicode(item).replace('<', '&lt;').replace('>', '&gt;')

    return output
示例#14
0
 def test_find(self):
     assert self.elem.find('example') == 24
     assert self.elem.find(u'example') == 24
     searchelem = parse(u'&brand;', general.parsers)
     assert self.elem.find(searchelem) == 46
示例#15
0
 def setup_method(self, method):
     self.elem = parse(TestStringElem.ORIGSTR, general.parsers)
示例#16
0
 def setup_method(self, method):
     self.elem = parse(self.ORIGSTR, general.parsers)
示例#17
0
 def setup_method(self, method):
     self.elem = parse(TestStringElem.ORIGSTR, general.parsers)
示例#18
0
def rich_parse(s):
    return parse(s, xliffparsers)
示例#19
0
 def setup_method(self, method):
     self.elem = parse(self.ORIGSTR, general.parsers)
示例#20
0
def mark_placeables(text):
    """Wrap placeables to easily distinguish and manipulate them"""

    PARSERS = [
        NewlineEscapePlaceable.parse,
        TabEscapePlaceable.parse,
        EscapePlaceable.parse,
        # The spaces placeable can match '\n  ' and mask the newline,
        # so it has to come later.
        SpacesPlaceable.parse,
        PythonFormatNamedPlaceable.parse,
        PythonFormatPlaceable.parse,
        general.XMLTagPlaceable.parse,
        general.AltAttrPlaceable.parse,
        general.XMLEntityPlaceable.parse,
        general.PythonFormattingPlaceable.parse,
        general.JavaMessageFormatPlaceable.parse,
        general.FormattingPlaceable.parse,
        # The Qt variables can consume the %1 in %1$s which will mask a printf
        # placeable, so it has to come later.
        general.QtFormattingPlaceable.parse,
        general.UrlPlaceable.parse,
        general.FilePlaceable.parse,
        general.EmailPlaceable.parse,
        general.CapsPlaceable.parse,
        general.CamelCasePlaceable.parse,
        general.OptionPlaceable.parse,
        general.PunctuationPlaceable.parse,
        general.NumberPlaceable.parse,
    ]

    TITLES = {
        'NewlineEscapePlaceable': "Escaped newline",
        'TabEscapePlaceable': "Escaped tab",
        'EscapePlaceable': "Escaped sequence",
        'SpacesPlaceable': "Unusual space in string",
        'AltAttrPlaceable': "'alt' attribute inside XML tag",
        'NewlinePlaceable': "New-line",
        'NumberPlaceable': "Number",
        'QtFormattingPlaceable': "Qt string formatting variable",
        'PythonFormattingPlaceable': "Python string formatting variable",
        'JavaMessageFormatPlaceable': "Java Message formatting variable",
        'FormattingPlaceable': "String formatting variable",
        'UrlPlaceable': "URI",
        'FilePlaceable': "File location",
        'EmailPlaceable': "Email",
        'PunctuationPlaceable': "Punctuation",
        'XMLEntityPlaceable': "XML entity",
        'CapsPlaceable': "Long all-caps string",
        'CamelCasePlaceable': "Camel case string",
        'XMLTagPlaceable': "XML tag",
        'OptionPlaceable': "Command line option",
        'PythonFormatNamedPlaceable': "Python format string",
        'PythonFormatPlaceable': "Python format string"
    }

    output = u""

    # Get a flat list of placeables and StringElem instances
    flat_items = parse(text, PARSERS).flatten()

    for item in flat_items:

        # Placeable: mark
        if isinstance(item, BasePlaceable):
            class_name = item.__class__.__name__
            placeable = unicode(item)

            # CSS class used to mark the placeable
            css = {
                'TabEscapePlaceable': "escape ",
                'EscapePlaceable': "escape ",
                'SpacesPlaceable': "space ",
                'NewlinePlaceable': "escape ",
            }.get(class_name, "")

            title = TITLES.get(class_name, "Unknown placeable")

            # Correctly render placeables in translation editor
            content = {
                'TabEscapePlaceable': u'\\t',
                'EscapePlaceable': u'\\',
                'NewlinePlaceable': {
                    u'\r\n': u'\\r\\n<br/>\n',
                    u'\r': u'\\r<br/>\n',
                    u'\n': u'\\n<br/>\n',
                }.get(placeable),
                'PythonFormatPlaceable':
                    placeable.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;'),
                'PythonFormatNamedPlaceable':
                    placeable.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;'),
                'XMLEntityPlaceable': placeable.replace('&', '&amp;'),
                'XMLTagPlaceable':
                    placeable.replace('<', '&lt;').replace('>', '&gt;'),
            }.get(class_name, placeable)

            output += ('<mark class="%splaceable" title="%s">%s</mark>') \
                % (css, title, content)

        # Not a placeable: skip
        else:
            output += unicode(item).replace('<', '&lt;').replace('>', '&gt;')

    return output
示例#21
0
def mark_placeables(text):
    """Wrap placeables to easily distinguish and manipulate them.

    Source: http://bit.ly/1yQOC9B
    """
    class TabEscapePlaceable(base.Ph):
        """Placeable handling tab escapes."""
        istranslatable = False
        regex = re.compile(r'\t')
        parse = classmethod(general.regex_parse)

    class EscapePlaceable(base.Ph):
        """Placeable handling escapes."""
        istranslatable = False
        regex = re.compile(r'\\')
        parse = classmethod(general.regex_parse)

    class SpacesPlaceable(base.Ph):
        """Placeable handling spaces."""
        istranslatable = False
        regex = re.compile('^ +| +$|[\r\n\t] +| {2,}')
        parse = classmethod(general.regex_parse)

    PARSERS = [
        TabEscapePlaceable.parse,
        EscapePlaceable.parse,
        general.NewlinePlaceable.parse,
        # The spaces placeable can match '\n  ' and mask the newline,
        # so it has to come later.
        SpacesPlaceable.parse,
        general.XMLTagPlaceable.parse,
        general.AltAttrPlaceable.parse,
        general.XMLEntityPlaceable.parse,
        general.PythonFormattingPlaceable.parse,
        general.JavaMessageFormatPlaceable.parse,
        general.FormattingPlaceable.parse,
        # The Qt variables can consume the %1 in %1$s which will mask a printf
        # placeable, so it has to come later.
        general.QtFormattingPlaceable.parse,
        general.UrlPlaceable.parse,
        general.FilePlaceable.parse,
        general.EmailPlaceable.parse,
        general.CapsPlaceable.parse,
        general.CamelCasePlaceable.parse,
        general.OptionPlaceable.parse,
        general.PunctuationPlaceable.parse,
        general.NumberPlaceable.parse,
    ]

    TITLES = {
        'TabEscapePlaceable': "Escaped tab",
        'EscapePlaceable': "Escaped sequence",
        'SpacesPlaceable': "Unusual space in string",
        'AltAttrPlaceable': "'alt' attribute inside XML tag",
        'NewlinePlaceable': "New-line",
        'NumberPlaceable': "Number",
        'QtFormattingPlaceable': "Qt string formatting variable",
        'PythonFormattingPlaceable': "Python string formatting variable",
        'JavaMessageFormatPlaceable': "Java Message formatting variable",
        'FormattingPlaceable': "String formatting variable",
        'UrlPlaceable': "URI",
        'FilePlaceable': "File location",
        'EmailPlaceable': "Email",
        'PunctuationPlaceable': "Punctuation",
        'XMLEntityPlaceable': "XML entity",
        'CapsPlaceable': "Long all-caps string",
        'CamelCasePlaceable': "Camel case string",
        'XMLTagPlaceable': "XML tag",
        'OptionPlaceable': "Command line option",
    }

    text = unicode(text, "utf8")
    output = u""

    # Get a flat list of placeables and StringElem instances
    flat_items = parse(text, PARSERS).flatten()

    for item in flat_items:

        # Placeable: mark
        if isinstance(item, BasePlaceable):
            class_name = item.__class__.__name__
            placeable = unicode(item)

            # CSS class used to mark the placeable
            css = {
                'TabEscapePlaceable': "escape ",
                'EscapePlaceable': "escape ",
                'SpacesPlaceable': "space ",
                'NewlinePlaceable': "escape ",
            }.get(class_name, "")

            title = TITLES.get(class_name, "Unknown placeable")

            spaces = '&nbsp;' * len(placeable)
            if not placeable.startswith(' '):
                spaces = placeable[0] + '&nbsp;' * (len(placeable) - 1)

            # Correctly render placeables in translation editor
            content = {
                'TabEscapePlaceable':
                u'\\t',
                'EscapePlaceable':
                u'\\\\',
                'SpacesPlaceable':
                spaces,
                'NewlinePlaceable': {
                    u'\r\n': u'\\r\\n<br/>\n',
                    u'\r': u'\\r<br/>\n',
                    u'\n': u'\\n<br/>\n',
                }.get(placeable),
                'XMLEntityPlaceable':
                placeable.replace('&', '&amp;'),
                'XMLTagPlaceable':
                placeable.replace('<', '&lt;').replace('>', '&gt;'),
            }.get(class_name, placeable)

            output += ('<mark class="%splaceable" title="%s">%s</mark>') \
                % (css, title, content)

        # Not a placeable: skip
        else:
            output += unicode(item)

    return output
示例#22
0
 def __init__(self):
     self.elem = parse(TestStringElem.ORIGSTR, general.parsers)
示例#23
0
def mark_placeables(text):
    """Wrap placeables to easily distinguish and manipulate them"""

    PARSERS = [
        NewlineEscapePlaceable.parse,
        TabEscapePlaceable.parse,
        EscapePlaceable.parse,

        # The spaces placeable can match '\n  ' and mask the newline,
        # so it has to come later.
        SpacesPlaceable.parse,

        # The XML placeables must be marked before variable placeables
        # to avoid marking variables, but leaving out tags. See:
        # https://bugzilla.mozilla.org/show_bug.cgi?id=1334926
        general.XMLTagPlaceable.parse,
        general.AltAttrPlaceable.parse,
        general.XMLEntityPlaceable.parse,
        PythonFormatNamedPlaceable.parse,
        PythonFormatPlaceable.parse,
        general.PythonFormattingPlaceable.parse,
        general.JavaMessageFormatPlaceable.parse,
        general.FormattingPlaceable.parse,

        # The Qt variables can consume the %1 in %1$s which will mask a printf
        # placeable, so it has to come later.
        general.QtFormattingPlaceable.parse,
        general.UrlPlaceable.parse,
        general.FilePlaceable.parse,
        general.EmailPlaceable.parse,
        general.CapsPlaceable.parse,
        general.CamelCasePlaceable.parse,
        general.OptionPlaceable.parse,
        general.PunctuationPlaceable.parse,
        general.NumberPlaceable.parse,
    ]

    TITLES = {
        'NewlineEscapePlaceable': "Escaped newline",
        'TabEscapePlaceable': "Escaped tab",
        'EscapePlaceable': "Escaped sequence",
        'SpacesPlaceable': "Unusual space in string",
        'AltAttrPlaceable': "'alt' attribute inside XML tag",
        'NewlinePlaceable': "New-line",
        'NumberPlaceable': "Number",
        'QtFormattingPlaceable': "Qt string formatting variable",
        'PythonFormattingPlaceable': "Python string formatting variable",
        'JavaMessageFormatPlaceable': "Java Message formatting variable",
        'FormattingPlaceable': "String formatting variable",
        'UrlPlaceable': "URI",
        'FilePlaceable': "File location",
        'EmailPlaceable': "Email",
        'PunctuationPlaceable': "Punctuation",
        'XMLEntityPlaceable': "XML entity",
        'CapsPlaceable': "Long all-caps string",
        'CamelCasePlaceable': "Camel case string",
        'XMLTagPlaceable': "XML tag",
        'OptionPlaceable': "Command line option",
        'PythonFormatNamedPlaceable': "Python format string",
        'PythonFormatPlaceable': "Python format string"
    }

    output = u""

    # Get a flat list of placeables and StringElem instances
    flat_items = parse(text, PARSERS).flatten()

    for item in flat_items:

        # Placeable: mark
        if isinstance(item, BasePlaceable):
            class_name = item.__class__.__name__
            placeable = text_type(item)

            # CSS class used to mark the placeable
            css = {
                'TabEscapePlaceable': "escape ",
                'EscapePlaceable': "escape ",
                'SpacesPlaceable': "space ",
                'NewlinePlaceable': "escape ",
            }.get(class_name, "")

            title = TITLES.get(class_name, "Unknown placeable")

            # Correctly render placeables in translation editor
            content = {
                'TabEscapePlaceable':
                u'\\t',
                'EscapePlaceable':
                u'\\',
                'NewlinePlaceable': {
                    u'\r\n': u'\\r\\n<br/>\n',
                    u'\r': u'\\r<br/>\n',
                    u'\n': u'\\n<br/>\n',
                }.get(placeable),
                'PythonFormatPlaceable':
                placeable.replace('&', '&amp;').replace('<', '&lt;').replace(
                    '>', '&gt;'),
                'PythonFormatNamedPlaceable':
                placeable.replace('&', '&amp;').replace('<', '&lt;').replace(
                    '>', '&gt;'),
                'XMLEntityPlaceable':
                placeable.replace('&', '&amp;'),
                'XMLTagPlaceable':
                placeable.replace('<', '&lt;').replace('>', '&gt;'),
            }.get(class_name, placeable)

            output += ('<mark class="%splaceable" title="%s">%s</mark>') \
                % (css, title, content)

        # Not a placeable: skip
        else:
            output += text_type(item).replace('<', '&lt;').replace('>', '&gt;')

    return output
示例#24
0
 def test_find(self):
     assert self.elem.find("example") == 24
     assert self.elem.find(u"example") == 24
     searchelem = parse(u"&brand;", general.parsers)
     assert self.elem.find(searchelem) == 46