def get_supported_unicode_character_set(CodecAlias=None, FileName=None, FH=-1, LineN=None): assert CodecAlias is not None or FileName is not None mapping_list = get_codec_transformation_info(CodecAlias, FileName, FH, LineN) result = NumberSet() for source_begin, source_end, target_begin in mapping_list: result.add_interval(Interval(source_begin, source_end)) return result
class Tracker: def __init__(self): self.match_set = NumberSet() self.negation_f = False def consider_interval(self, Begin, End): if Begin > End: raise RegularExpressionException("Character range: '-' requires character with 'lower code' to preceed\n" + \ "found range '%s-%s' which corresponds to %i-%i as unicode code points." % \ (utf8.map_unicode_to_utf8(Begin), utf8.map_unicode_to_utf8(End), Begin, End)) self.match_set.add_interval(Interval(Begin, End)) def consider_letter(self, CharCode): self.consider_interval(CharCode, CharCode + 1)
class Tracker: def __init__(self): self.match_set = NumberSet() self.negation_f = False def consider_interval(self, Begin, End): if Begin > End: raise RegularExpressionException("Character range: '-' requires character with 'lower code' to preceed\n" + \ "found range '%s-%s' which corresponds to %i-%i as unicode code points." % \ (utf8.map_unicode_to_utf8(Begin), utf8.map_unicode_to_utf8(End), Begin, End)) self.match_set.add_interval(Interval(Begin, End)) def consider_letter(self, CharCode): self.consider_interval(CharCode, CharCode+1)
def do(section_list, fh): """Parses a codec information file. The described codec can only be a 'static character length' encoding. That is every character in the code occupies the same number of bytes. RETURNS: [0] Set of characters in unicode which are covered by the described codec. [1] Range of values in the codec elements. """ source_set = NumberSet() drain_set = NumberSet() error_str = None try: while error_str is None: skip_whitespace(fh) source_begin = read_integer(fh) if source_begin is None: error_str = "Missing integer (source interval begin) in codec file." continue skip_whitespace(fh) source_size = read_integer(fh) if source_size is None: error_str = "Missing integer (source interval size) in codec file." continue skip_whitespace(fh) target_begin = read_integer(fh) if target_begin is None: error_str = "Missing integer (target interval begin) in codec file." continue source_end = source_begin + source_size list.append(section_list, [source_begin, source_end, target_begin]) source_set.add_interval(Interval(source_begin, source_end)) drain_set.add_interval( Interval(target_begin, target_begin + source_size)) except EndOfStreamException: pass return source_set, drain_set, error_str