def _encode_field(self, value, param="field"): """convert field to internal representation. internal representation is always bytes. byte strings are left as-is, unicode strings encoding using file's default encoding (or ``utf-8`` if no encoding has been specified). :raises UnicodeEncodeError: if unicode value cannot be encoded using default encoding. :raises ValueError: if resulting byte string contains a forbidden character, or is too long (>255 bytes). :returns: encoded identifer as bytes """ if isinstance(value, unicode): value = value.encode(self.encoding) elif not isinstance(value, bytes): raise ExpectedStringError(value, param) if len(value) > 255: raise ValueError("%s must be at most 255 characters: %r" % (param, value)) if any(c in _INVALID_FIELD_CHARS for c in value): raise ValueError("%s contains invalid characters: %r" % ( param, value, )) return value
def to_unicode(source, encoding="utf-8", param="value"): """Helper to normalize input to unicode. :arg source: source bytes/unicode to process. :arg encoding: encoding to use when decoding bytes instances. :param param: optional name of variable/noun to reference when raising errors. :raises TypeError: if source is not unicode or bytes. :returns: * returns unicode strings unchanged. * returns bytes strings decoded using *encoding* """ assert encoding if isinstance(source, unicode): return source elif isinstance(source, bytes): return source.decode(encoding) else: raise ExpectedStringError(source, param)
def to_native_str(source, encoding="utf-8", param="value"): if isinstance(source, bytes): return source elif isinstance(source, unicode): return source.encode(encoding) else: raise ExpectedStringError(source, param)
def to_bytes(source, encoding="utf-8", param="value", source_encoding=None): """Helper to normalize input to bytes. :arg source: Source bytes/unicode to process. :arg encoding: Target encoding (defaults to ``"utf-8"``). :param param: Optional name of variable/noun to reference when raising errors :param source_encoding: If this is specified, and the source is bytes, the source will be transcoded from *source_encoding* to *encoding* (via unicode). :raises TypeError: if source is not unicode or bytes. :returns: * unicode strings will be encoded using *encoding*, and returned. * if *source_encoding* is not specified, byte strings will be returned unchanged. * if *source_encoding* is specified, byte strings will be transcoded to *encoding*. """ assert encoding if isinstance(source, bytes): if source_encoding and not is_same_codec(source_encoding, encoding): return source.decode(source_encoding).encode(encoding) else: return source elif isinstance(source, unicode): return source.encode(encoding) else: raise ExpectedStringError(source, param)