Пример #1
0
    def test_unescape(self):
        from clld.lib.bibtex import unescape, u_unescape

        self.assertEqual(unescape(binary_type("\\ss \xef".encode('latin1'))), 'ß\xef')
        self.assertEqual(unescape("\\ss "), 'ß')
        self.assertEqual(u_unescape('?[\\u123] ?[\\u1234]'), '{ \u04d2')
        s = '\u2013'
        self.assertEqual(s, unescape(s))
        self.assertEqual(unescape('?[\\u65533]'), '\ufffd')
Пример #2
0
    def test_unescape(self):
        from clld.lib.bibtex import unescape, u_unescape

        self.assertEqual(unescape(binary_type("\\ss \xef".encode('latin1'))),
                         'ß\xef')
        self.assertEqual(unescape("\\ss "), 'ß')
        self.assertEqual(u_unescape('?[\\u123] ?[\\u1234]'), '{ \u04d2')
        s = '\u2013'
        self.assertEqual(s, unescape(s))
        self.assertEqual(unescape('?[\\u65533]'), '\ufffd')
Пример #3
0
def unescape(string):
    """transform latex escape sequences of type \`\ae  into unicode
    """
    def _delatex(s):
        try:
            t = str(s)
            result = t.decode('latex+latin1')
        except UnicodeEncodeError:  # pragma: no cover
            result = string
        u_result = unicode(result)
        return u_result

    res = u_unescape(_delatex(stripctrlchars(unicode(string).strip())))
    for symbol in sorted(SYMBOLS.keys(), key=lambda s: len(s)):
        res = res.replace(symbol, SYMBOLS[symbol])
    if '\\' not in res:
        res = res.replace('{', '')
        res = res.replace('}', '')
    res = res.replace('\\\\&{} ', '& ')
    return res