def clean(string, n_cols=None): """ Required reading! http://nedbatchelder.com/text/unipain.html Python 2 input string will be a unicode type (unicode code points). Curses will accept unicode if all of the points are in the ascii range. However, if any of the code points are not valid ascii curses will throw a UnicodeEncodeError: 'ascii' codec can't encode character, ordinal not in range(128). If we encode the unicode to a utf-8 byte string and pass that to curses, it will render correctly. Python 3 input string will be a string type (unicode code points). Curses will accept that in all cases. However, the n character count in addnstr will not be correct. If code points are passed to addnstr, curses will treat each code point as one character and will not account for wide characters. If utf-8 is passed in, addnstr will treat each 'byte' as a single character. """ if n_cols is not None and n_cols <= 0: return '' if not config.unicode: if six.PY3 or isinstance(string, unicode): string = string.encode('ascii', 'replace') return string[:n_cols] if n_cols else string else: if n_cols: string = textual_width_chop(string, n_cols) if six.PY3 or isinstance(string, unicode): string = string.encode('utf-8') return string
def utf8_width_chop(msg, chop=None): '''**Deprecated** Return a string chopped to a given :term:`textual width` Use :func:`~kitchen.text.display.textual_width_chop` and :func:`~kitchen.text.display.textual_width` instead:: >>> msg = 'く ku ら ra と to み mi' >>> # Old way: >>> utf8_width_chop(msg, 5) (5, 'く ku') >>> # New way >>> from kitchen.text.converters import to_bytes >>> from kitchen.text.display import textual_width, textual_width_chop >>> (textual_width(msg), to_bytes(textual_width_chop(msg, 5))) (5, 'く ku') ''' warnings.warn('kitchen.text.utf8.utf8_width_chop is deprecated. Use' ' kitchen.text.display.textual_width_chop instead', DeprecationWarning, stacklevel=2) if chop == None: return textual_width(msg), msg as_bytes = not isunicodestring(msg) chopped_msg = textual_width_chop(msg, chop) if as_bytes: chopped_msg = to_bytes(chopped_msg) return textual_width(chopped_msg), chopped_msg
def clean(self, string, n_cols=None): """ Required reading! http://nedbatchelder.com/text/unipain.html Python 2 input string will be a unicode type (unicode code points). Curses will accept unicode if all of the points are in the ascii range. However, if any of the code points are not valid ascii curses will throw a UnicodeEncodeError: 'ascii' codec can't encode character, ordinal not in range(128). If we encode the unicode to a utf-8 byte string and pass that to curses, it will render correctly. Python 3 input string will be a string type (unicode code points). Curses will accept that in all cases. However, the n character count in addnstr will not be correct. If code points are passed to addnstr, curses will treat each code point as one character and will not account for wide characters. If utf-8 is passed in, addnstr will treat each 'byte' as a single character. Reddit's api sometimes chokes and double-encodes some html characters Praw handles the initial decoding, but we need to do a second pass just to make sure. See https://github.com/tildeclub/ttrv/issues/96 Example: &amp; -> returned directly from reddit's api & -> returned after PRAW decodes the html characters & -> returned after our second pass, this is the true value """ if n_cols is not None and n_cols <= 0: return '' if isinstance(string, six.text_type): string = unescape(string) if self.config['ascii']: if isinstance(string, six.binary_type): string = string.decode('utf-8') string = string.encode('ascii', 'replace') return string[:n_cols] if n_cols else string else: if n_cols: string = textual_width_chop(string, n_cols) if isinstance(string, six.text_type): string = string.encode('utf-8') return string
def test_textual_width_chop(self): '''utf8_width_chop with byte strings''' tools.eq_(display.textual_width_chop(self.u_mixed, 1000), self.u_mixed) tools.eq_(display.textual_width_chop(self.u_mixed, 23), self.u_mixed) tools.eq_(display.textual_width_chop(self.u_mixed, 22), self.u_mixed[:-1]) tools.eq_(display.textual_width_chop(self.u_mixed, 19), self.u_mixed[:-4]) tools.eq_(display.textual_width_chop(self.u_mixed, 1), '') tools.eq_(display.textual_width_chop(self.u_mixed, 2), self.u_mixed[0]) tools.eq_(display.textual_width_chop(self.u_mixed, 3), self.u_mixed[:2]) tools.eq_(display.textual_width_chop(self.u_mixed, 4), self.u_mixed[:3]) tools.eq_(display.textual_width_chop(self.u_mixed, 5), self.u_mixed[:4]) tools.eq_(display.textual_width_chop(self.u_mixed, 6), self.u_mixed[:5]) tools.eq_(display.textual_width_chop(self.u_mixed, 7), self.u_mixed[:5]) tools.eq_(display.textual_width_chop(self.u_mixed, 8), self.u_mixed[:6]) tools.eq_(display.textual_width_chop(self.u_mixed, 9), self.u_mixed[:7]) tools.eq_(display.textual_width_chop(self.u_mixed, 10), self.u_mixed[:8]) tools.eq_(display.textual_width_chop(self.u_mixed, 11), self.u_mixed[:9]) tools.eq_(display.textual_width_chop(self.u_mixed, 12), self.u_mixed[:10]) tools.eq_(display.textual_width_chop(self.u_mixed, 13), self.u_mixed[:10]) tools.eq_(display.textual_width_chop(self.u_mixed, 14), self.u_mixed[:11]) tools.eq_(display.textual_width_chop(self.u_mixed, 15), self.u_mixed[:12]) tools.eq_(display.textual_width_chop(self.u_mixed, 16), self.u_mixed[:13]) tools.eq_(display.textual_width_chop(self.u_mixed, 17), self.u_mixed[:14]) tools.eq_(display.textual_width_chop(self.u_mixed, 18), self.u_mixed[:15]) tools.eq_(display.textual_width_chop(self.u_mixed, 19), self.u_mixed[:15]) tools.eq_(display.textual_width_chop(self.u_mixed, 20), self.u_mixed[:16]) tools.eq_(display.textual_width_chop(self.u_mixed, 21), self.u_mixed[:17])