示例#1
0
def clean(string, n_cols=None):
    """
    Required reading!
        http://nedbatchelder.com/text/unipain.html

    Python 2 input string will be a unicode type (unicode code points). Curses
    will accept unicode if all of the points are in the ascii range. However, if
    any of the code points are not valid ascii curses will throw a
    UnicodeEncodeError: 'ascii' codec can't encode character, ordinal not in
    range(128). If we encode the unicode to a utf-8 byte string and pass that to
    curses, it will render correctly.

    Python 3 input string will be a string type (unicode code points). Curses
    will accept that in all cases. However, the n character count in addnstr
    will not be correct. If code points are passed to addnstr, curses will treat
    each code point as one character and will not account for wide characters.
    If utf-8 is passed in, addnstr will treat each 'byte' as a single character.
    """

    if n_cols is not None and n_cols <= 0:
        return ''

    if not config.unicode:
        if six.PY3 or isinstance(string, unicode):
            string = string.encode('ascii', 'replace')
        return string[:n_cols] if n_cols else string
    else:
        if n_cols:
            string = textual_width_chop(string, n_cols)
        if six.PY3 or isinstance(string, unicode):
            string = string.encode('utf-8')
        return string
示例#2
0
def utf8_width_chop(msg, chop=None):
    '''**Deprecated** Return a string chopped to a given :term:`textual width`

    Use :func:`~kitchen.text.display.textual_width_chop` and
    :func:`~kitchen.text.display.textual_width` instead::

        >>> msg = 'く ku ら ra と to み mi'
        >>> # Old way:
        >>> utf8_width_chop(msg, 5)
        (5, 'く ku')
        >>> # New way
        >>> from kitchen.text.converters import to_bytes
        >>> from kitchen.text.display import textual_width, textual_width_chop
        >>> (textual_width(msg), to_bytes(textual_width_chop(msg, 5)))
        (5, 'く ku')
    '''
    warnings.warn('kitchen.text.utf8.utf8_width_chop is deprecated.  Use'
        ' kitchen.text.display.textual_width_chop instead', DeprecationWarning,
        stacklevel=2)

    if chop == None:
        return textual_width(msg), msg

    as_bytes = not isunicodestring(msg)
 
    chopped_msg = textual_width_chop(msg, chop)
    if as_bytes:
        chopped_msg = to_bytes(chopped_msg)
    return textual_width(chopped_msg), chopped_msg
示例#3
0
    def clean(self, string, n_cols=None):
        """
        Required reading!
            http://nedbatchelder.com/text/unipain.html

        Python 2 input string will be a unicode type (unicode code points).
        Curses will accept unicode if all of the points are in the ascii range.
        However, if any of the code points are not valid ascii curses will
        throw a UnicodeEncodeError: 'ascii' codec can't encode character,
        ordinal not in range(128). If we encode the unicode to a utf-8 byte
        string and pass that to curses, it will render correctly.

        Python 3 input string will be a string type (unicode code points).
        Curses will accept that in all cases. However, the n character count in
        addnstr will not be correct. If code points are passed to addnstr,
        curses will treat each code point as one character and will not account
        for wide characters. If utf-8 is passed in, addnstr will treat each
        'byte' as a single character.

        Reddit's api sometimes chokes and double-encodes some html characters
        Praw handles the initial decoding, but we need to do a second pass
        just to make sure. See https://github.com/tildeclub/ttrv/issues/96

        Example:
            &amp;amp; -> returned directly from reddit's api
            &amp;     -> returned after PRAW decodes the html characters
            &         -> returned after our second pass, this is the true value
        """

        if n_cols is not None and n_cols <= 0:
            return ''

        if isinstance(string, six.text_type):
            string = unescape(string)

        if self.config['ascii']:
            if isinstance(string, six.binary_type):
                string = string.decode('utf-8')
            string = string.encode('ascii', 'replace')
            return string[:n_cols] if n_cols else string
        else:
            if n_cols:
                string = textual_width_chop(string, n_cols)
            if isinstance(string, six.text_type):
                string = string.encode('utf-8')
            return string
 def test_textual_width_chop(self):
     '''utf8_width_chop with byte strings'''
     tools.eq_(display.textual_width_chop(self.u_mixed, 1000), self.u_mixed)
     tools.eq_(display.textual_width_chop(self.u_mixed, 23), self.u_mixed)
     tools.eq_(display.textual_width_chop(self.u_mixed, 22),
               self.u_mixed[:-1])
     tools.eq_(display.textual_width_chop(self.u_mixed, 19),
               self.u_mixed[:-4])
     tools.eq_(display.textual_width_chop(self.u_mixed, 1), '')
     tools.eq_(display.textual_width_chop(self.u_mixed, 2), self.u_mixed[0])
     tools.eq_(display.textual_width_chop(self.u_mixed, 3),
               self.u_mixed[:2])
     tools.eq_(display.textual_width_chop(self.u_mixed, 4),
               self.u_mixed[:3])
     tools.eq_(display.textual_width_chop(self.u_mixed, 5),
               self.u_mixed[:4])
     tools.eq_(display.textual_width_chop(self.u_mixed, 6),
               self.u_mixed[:5])
     tools.eq_(display.textual_width_chop(self.u_mixed, 7),
               self.u_mixed[:5])
     tools.eq_(display.textual_width_chop(self.u_mixed, 8),
               self.u_mixed[:6])
     tools.eq_(display.textual_width_chop(self.u_mixed, 9),
               self.u_mixed[:7])
     tools.eq_(display.textual_width_chop(self.u_mixed, 10),
               self.u_mixed[:8])
     tools.eq_(display.textual_width_chop(self.u_mixed, 11),
               self.u_mixed[:9])
     tools.eq_(display.textual_width_chop(self.u_mixed, 12),
               self.u_mixed[:10])
     tools.eq_(display.textual_width_chop(self.u_mixed, 13),
               self.u_mixed[:10])
     tools.eq_(display.textual_width_chop(self.u_mixed, 14),
               self.u_mixed[:11])
     tools.eq_(display.textual_width_chop(self.u_mixed, 15),
               self.u_mixed[:12])
     tools.eq_(display.textual_width_chop(self.u_mixed, 16),
               self.u_mixed[:13])
     tools.eq_(display.textual_width_chop(self.u_mixed, 17),
               self.u_mixed[:14])
     tools.eq_(display.textual_width_chop(self.u_mixed, 18),
               self.u_mixed[:15])
     tools.eq_(display.textual_width_chop(self.u_mixed, 19),
               self.u_mixed[:15])
     tools.eq_(display.textual_width_chop(self.u_mixed, 20),
               self.u_mixed[:16])
     tools.eq_(display.textual_width_chop(self.u_mixed, 21),
               self.u_mixed[:17])