Exemplo n.º 1
0
def snap_term(stream, PatternDict):
    """term:  primary
              primary term 
    """
    __debug_entry("term", stream)

    # -- primary
    result = snap_primary(stream, PatternDict)
    __debug_print("##primary(in term):", result)
    if result == None: return __debug_exit(None, stream)
    position_1 = stream.tell()

    # -- optional 'term'
    result_2 = snap_term(stream, PatternDict)
    __debug_print("##term(in term):", result_2)
    if result_2 == None:
        stream.seek(position_1)
        return __debug_exit(result, stream)

    ## print "##1:", result.get_string(NormalizeF=False)
    ## print "##2:", result_2.get_string(NormalizeF=False)
    result = sequentialize.do([result, result_2],
                              MountToFirstStateMachineF=True,
                              CloneRemainingStateMachinesF=False)

    return __debug_exit(construct.beautify(result), stream)
Exemplo n.º 2
0
def snap_expression(stream, PatternDict):
    """expression:  term
                    term | expression
    """
    __debug_entry("expression", stream)
    # -- term
    result = snap_term(stream, PatternDict)
    if result == None:
        return __debug_exit(None, stream)

    # -- optional '|'
    if not check(stream, '|'):
        return __debug_exit(result, stream)

    position_1 = stream.tell()
    __debug_print("'|' (in expression)")

    # -- expression
    result_2 = snap_expression(stream, PatternDict)
    __debug_print("expression(in expression):", result_2)
    if result_2 == None:
        stream.seek(position_1)
        return __debug_exit(result, stream)

    result = parallelize.do([result, result_2])
    return __debug_exit(construct.beautify(result), stream)
Exemplo n.º 3
0
def snap_primary(stream, PatternDict):
    """primary:  " non_double_quote *  "              = character string
                 [ non_rect_bracket_close ]           = set of characters
                 { identifier }                       = pattern replacement
                 ( expression )
                 non_control_character+               = lonely characters
                 primary repetition_cmd
    """
    __debug_entry("primary", stream)
    x = stream.read(1)
    lookahead = stream.read(1)
    if x != "" and lookahead != "": stream.seek(-1, 1)
    if x == "": return __debug_exit(None, stream)

    # -- 'primary' primary
    if x == "\"": result = snap_character_string.do(stream)
    elif x == "[":
        stream.seek(-1, 1)
        result = character_set_expression.do(stream, PatternDict)
    elif x == "{":
        result = snap_replacement(stream, PatternDict)
    elif x == ".":
        result = create_ALL_BUT_NEWLINE_state_machine()
    elif x == "(":
        result = snap_bracketed_expression(stream, PatternDict)

    elif x.isspace():
        # a lonestanding space ends the regular expression
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    elif x in ["*", "+", "?"]:
        raise RegularExpressionException(
            "lonely operator '%s' without expression proceeding." % x)

    elif x == "\\":
        if lookahead == "C":
            stream.read(1)
            result = snap_case_folded_pattern(stream, PatternDict)
        else:
            stream.seek(-1, 1)
            trigger_set = character_set_expression.snap_property_set(stream)
            if trigger_set == None:
                stream.seek(
                    1, 1)  # snap_property_set() leaves tream right before '\\'
                char_code = snap_backslashed_character.do(stream)
                if char_code == None:
                    raise RegularExpressionException(
                        "Backslash followed by unrecognized character code.")
                trigger_set = char_code
            result = StateMachine()
            result.add_transition(result.init_state_index,
                                  trigger_set,
                                  AcceptanceF=True)

    elif x not in CONTROL_CHARACTERS:
        # NOTE: The '\' is not inside the control characters---for a reason.
        #       It is used to define for example character codes using '\x' etc.
        stream.seek(-1, 1)
        result = snap_non_control_character(stream, PatternDict)

    else:
        # NOTE: This includes the '$' sign which means 'end of line'
        #       because the '$' sign is in CONTROL_CHARACTERS, but is not checked
        #       against. Thus, it it good to leave here on '$' because the
        #       '$' sign is handled on the very top level.
        # this is not a valid primary
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    # -- optional repetition command?
    result_repeated = __snap_repetition_range(result, stream)
    ## print "##imr:", result.get_string(NormalizeF=False)
    if result_repeated != None: result = result_repeated
    return __debug_exit(construct.beautify(result), stream)