Python get_character_set示例

编程语言: Python

命名空间/包名称: quex.engine.codec_db.unicode.parser.ucs_property_db

方法/功能: get_character_set

hotexamples.com的示例: 4

Python get_character_set - 已找到4个示例。这些是从开源项目中提取的最受好评的quex.engine.codec_db.unicode.parser.ucs_property_db.get_character_set现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： code_fragment.py 项目： mplucinski/quex

def read_character_code(fh):
    # NOTE: This function is tested with the regeression test for feature request 2251359.
    #       See directory $QUEX_PATH/TEST/2251359.
    pos = fh.tell()
    
    start = fh.read(1)
    if start == "":  
        fh.seek(pos); return -1

    elif start == "'": 
        # read an utf-8 char an get the token-id
        # Example: '+'
        if check(fh, "\\"):
            # snap_backslashed_character throws an exception if 'backslashed char' is nonsense.
            character_code = snap_backslashed_character.do(fh, ReducedSetOfBackslashedCharactersF=True)
        else:
            character_code = __read_one_utf8_code_from_stream(fh)

        if character_code is None:
            error.log("Missing utf8-character for definition of character code by character.", 
                      fh)

        elif fh.read(1) != '\'':
            error.log("Missing closing ' for definition of character code by character.", 
                      fh)

        return character_code

    if start == "U":
        if fh.read(1) != "C": fh.seek(pos); return -1
        # read Unicode Name 
        # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE
        skip_whitespace(fh)
        ucs_name = __read_token_identifier(fh)
        if ucs_name == "": fh.seek(pos); return -1
        # Get the character set related to the given name. Note, the size of the set
        # is supposed to be one.
        character_code = ucs_property_db.get_character_set("Name", ucs_name)
        if type(character_code) in [str, unicode]:
            error.verify_word_in_list(ucs_name, ucs_property_db["Name"].code_point_db,
                                      "The string %s\ndoes not identify a known unicode character." % ucs_name, 
                                      fh)
        elif type(character_code) not in [int, long]:
            error.log("%s relates to more than one character in unicode database." % ucs_name, 
                      fh) 
        return character_code

    fh.seek(pos)
    character_code = read_integer(fh)
    if character_code is not None: return character_code

    # Try to interpret it as something else ...
    fh.seek(pos)
    return -1

示例#2

显示文件

def do_shortcut(stream, ShortcutLetter, PropertyAlias):
    """Name property shortcut '\ShortcutLetter{...}' which is a shortcut
       for '\P{PropertyAlias=...}'.
    
       Parse an expression of the form '\N{CHARACTER NAME}'
       and return the related character set of characters that 
       match the given name. Wildcards in are allowed.
    """
    content = __parse_property_expression(stream, ShortcutLetter, EqualConditionPossibleF=False)
    # if len(content) != 1 then an exception is thrown

    property_value = content[0]

    result = ucs_property_db.get_character_set(PropertyAlias, property_value)

    if type(result) == str:
        raise RegularExpressionException(result)

    return result

示例#3

显示文件

def do(stream):
    """Property expression: '\P{...}'
    
       Parse an expression of the forms:

       '\P{property = value}' or '\P{binary_property}'

        and return the related character set.
    """
    content = __parse_property_expression(stream, "P")
    # if len(content) < 1 or > 2 then an exception is thrown

    property_name = content[0]
    if len(content) == 1: property_value = None
    else:                 property_value = content[1]

    result = ucs_property_db.get_character_set(property_name, property_value)

    if type(result) == str:
        raise RegularExpressionException(result)

    return result

示例#4

显示文件

文件： code_fragment.py 项目： praveenmunagapati/quex

def read_character_code(fh):
    # NOTE: This function is tested with the regeression test for feature request 2251359.
    #       See directory $QUEX_PATH/TEST/2251359.
    pos = fh.tell()

    start = fh.read(1)
    if start == "":
        fh.seek(pos)
        return -1

    elif start == "'":
        # read an utf-8 char an get the token-id
        # Example: '+'
        if check(fh, "\\"):
            # snap_backslashed_character throws an exception if 'backslashed char' is nonsense.
            character_code = snap_backslashed_character.do(
                fh, ReducedSetOfBackslashedCharactersF=True)
        else:
            character_code = __read_one_utf8_code_from_stream(fh)

        if character_code is None:
            error.log(
                "Missing utf8-character for definition of character code by character.",
                fh)

        elif fh.read(1) != '\'':
            error.log(
                "Missing closing ' for definition of character code by character.",
                fh)

        return character_code

    if start == "U":
        if fh.read(1) != "C":
            fh.seek(pos)
            return -1
        # read Unicode Name
        # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE
        skip_whitespace(fh)
        ucs_name = __read_token_identifier(fh)
        if ucs_name == "":
            fh.seek(pos)
            return -1
        # Get the character set related to the given name. Note, the size of the set
        # is supposed to be one.
        character_code = ucs_property_db.get_character_set("Name", ucs_name)
        if type(character_code) in [str, unicode]:
            error.verify_word_in_list(
                ucs_name, ucs_property_db["Name"].code_point_db,
                "The string %s\ndoes not identify a known unicode character." %
                ucs_name, fh)
        elif type(character_code) not in [int, long]:
            error.log(
                "%s relates to more than one character in unicode database." %
                ucs_name, fh)
        return character_code

    fh.seek(pos)
    character_code = read_integer(fh)
    if character_code is not None: return character_code

    # Try to interpret it as something else ...
    fh.seek(pos)
    return -1