示例#1
0
文件: core.py 项目: liancheng/rose
def get_supported_unicode_character_set(CodecAlias=None, FileName=None, FH=-1, LineN=None):
    assert CodecAlias is not None or FileName is not None

    mapping_list = get_codec_transformation_info(CodecAlias, FileName, FH, LineN)
    result       = NumberSet()
    for source_begin, source_end, target_begin in mapping_list:
        result.add_interval(Interval(source_begin, source_end))
    return result
class Tracker:
    def __init__(self):
        self.match_set = NumberSet()
        self.negation_f = False

    def consider_interval(self, Begin, End):
        if Begin > End:
            raise RegularExpressionException("Character range: '-' requires character with 'lower code' to preceed\n" + \
                                             "found range '%s-%s' which corresponds to %i-%i as unicode code points." % \
                                             (utf8.map_unicode_to_utf8(Begin), utf8.map_unicode_to_utf8(End), Begin, End))

        self.match_set.add_interval(Interval(Begin, End))

    def consider_letter(self, CharCode):
        self.consider_interval(CharCode, CharCode + 1)
class Tracker:
    def __init__(self):
        self.match_set  = NumberSet()
        self.negation_f = False
 
    def consider_interval(self, Begin, End):
        if Begin > End:
            raise RegularExpressionException("Character range: '-' requires character with 'lower code' to preceed\n" + \
                                             "found range '%s-%s' which corresponds to %i-%i as unicode code points." % \
                                             (utf8.map_unicode_to_utf8(Begin), utf8.map_unicode_to_utf8(End), Begin, End))

        self.match_set.add_interval(Interval(Begin, End))

    def consider_letter(self, CharCode):
        self.consider_interval(CharCode, CharCode+1)
示例#4
0
文件: parser.py 项目: yifsun/amplify
def do(section_list, fh):
    """Parses a codec information file. The described codec can only be
    a 'static character length' encoding. That is every character in the
    code occupies the same number of bytes.

    RETURNS: [0] Set of characters in unicode which are covered by the
                 described codec.
             [1] Range of values in the codec elements.
    """
    source_set = NumberSet()
    drain_set = NumberSet()

    error_str = None

    try:
        while error_str is None:
            skip_whitespace(fh)
            source_begin = read_integer(fh)
            if source_begin is None:
                error_str = "Missing integer (source interval begin) in codec file."
                continue

            skip_whitespace(fh)
            source_size = read_integer(fh)
            if source_size is None:
                error_str = "Missing integer (source interval size) in codec file."
                continue

            skip_whitespace(fh)
            target_begin = read_integer(fh)
            if target_begin is None:
                error_str = "Missing integer (target interval begin) in codec file."
                continue

            source_end = source_begin + source_size
            list.append(section_list, [source_begin, source_end, target_begin])

            source_set.add_interval(Interval(source_begin, source_end))
            drain_set.add_interval(
                Interval(target_begin, target_begin + source_size))

    except EndOfStreamException:
        pass

    return source_set, drain_set, error_str