Exemplo n.º 1
0
def FindFollowSets(lemp):
    '''Compute all followsets.
    
    A followset is the set of all symbols which can come immediately
    after a configuration.
    '''

    for i in range(lemp.nstate):
        for cfp in iterlinks(lemp.sorted[i].cfp):
            cfp.status = INCOMPLETE

    progress = 1
    while progress:
        progress = 0
        for i in range(lemp.nstate):
            for cfp in iterlinks(lemp.sorted[i].cfp):
                if cfp.status == COMPLETE:
                    continue
                for plp in iterlinks(cfp.fplp):
                    change = SetUnion(plp.cfp.fws, cfp.fws)
                    if change:
                        plp.cfp.status = INCOMPLETE
                        progress = 1
                cfp.status = COMPLETE

    return
Exemplo n.º 2
0
def FindFollowSets(lemp):
    '''Compute all followsets.
    
    A followset is the set of all symbols which can come immediately
    after a configuration.
    '''

    for i in range(lemp.nstate):
        for cfp in iterlinks(lemp.sorted[i].cfp):
            cfp.status = INCOMPLETE

    progress = 1
    while progress:
        progress = 0
        for i in range(lemp.nstate):
            for cfp in iterlinks(lemp.sorted[i].cfp):
                if cfp.status == COMPLETE:
                    continue
                for plp in iterlinks(cfp.fplp):
                    change = SetUnion(plp.cfp.fws, cfp.fws)
                    if change:
                        plp.cfp.status = INCOMPLETE
                        progress = 1
                cfp.status = COMPLETE

    return
Exemplo n.º 3
0
def Configlist_closure(lemp):
    '''Compute the closure of the configuration list.'''

    from plink import Plink_add

    for cfp in iterlinks(current):
        rp = cfp.rp
        dot = cfp.dot
        if dot >= rp.nrhs:
            continue
        sp = rp.rhs[dot]
        if sp.type == NONTERMINAL:
            if sp.rule is None and sp != lemp.errsym:
                ErrorMsg(lemp.filename, rp.line,
                         'Nonterminal "%s" has no rules.', sp.name)
                lemp.errorcnt += 1
            for newrp in iterlinks(sp.rule, 'nextlhs'):
                newcfp = Configlist_add(newrp, 0)
                for i in range(dot + 1, rp.nrhs):
                    xsp = rp.rhs[i]
                    if xsp.type == TERMINAL:
                        SetAdd(newcfp.fws, xsp.index)
                        break
                    elif xsp.type == MULTITERMINAL:
                        for k in range(xsp.nsubsym):
                            SetAdd(newcfp.fws, xsp.subsym[k].index)
                        break
                    else:
                        SetUnion(newcfp.fws, xsp.firstset)
                        if not xsp._lambda:
                            break
                else:
                    cfp.fplp = Plink_add(cfp.fplp, newcfp)
    return
Exemplo n.º 4
0
def Configlist_closure(lemp):
    '''Compute the closure of the configuration list.'''

    from plink import Plink_add

    for cfp in iterlinks(current):
        rp = cfp.rp
        dot = cfp.dot
        if dot >= rp.nrhs:
            continue
        sp = rp.rhs[dot]
        if sp.type == NONTERMINAL:
            if sp.rule is None and sp != lemp.errsym:
                ErrorMsg(lemp.filename, rp.line,
                         'Nonterminal "%s" has no rules.',
                         sp.name)
                lemp.errorcnt += 1
            for newrp in iterlinks(sp.rule, 'nextlhs'):
                newcfp = Configlist_add(newrp, 0)
                for i in range(dot + 1, rp.nrhs):
                    xsp = rp.rhs[i]
                    if xsp.type == TERMINAL:
                        SetAdd(newcfp.fws, xsp.index)
                        break
                    elif xsp.type == MULTITERMINAL:
                        for k in range(xsp.nsubsym):
                            SetAdd(newcfp.fws, xsp.subsym[k].index)
                        break
                    else:
                        SetUnion(newcfp.fws, xsp.firstset)
                        if not xsp._lambda:
                            break
                else:
                    cfp.fplp = Plink_add(cfp.fplp, newcfp)
    return
Exemplo n.º 5
0
def FindActions(lemp):
    '''Compute the reduce actions, and resolve conflicts.'''

    # Add all of the reduce actions
    # A reduce action is added for each element of the followset of
    # a configuration which has its dot at the extreme right.
    for i in range(lemp.nstate):  # Loop over all states
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp):  # Loop over all configurations
            if cfp.rp.nrhs == cfp.dot:  # Is dot at extreme right?
                for j in range(lemp.nterminal):
                    if SetFind(cfp.fws, j):
                        # Add a reduce action to the state "stp" which
                        # will reduce by the rule "cfp->rp" if the
                        # lookahead symbol is "lemp->symbols[j]"
                        stp.ap = Action_add(stp.ap, REDUCE, lemp.symbols[j],
                                            cfp.rp)

    # Add the accepting token
    if lemp.start:
        sp = Symbol_find(lemp.start)
        if sp is None:
            sp = lemp.rule.lhs
    else:
        sp = lemp.rule.lhs

    # Add to the first state (which is always the starting state of
    # the finite state machine) an action to ACCEPT if the lookahead
    # is the start nonterminal.
    lemp.sorted[0].ap = Action_add(lemp.sorted[0].ap, ACCEPT, sp, None)

    # Resolve conflicts
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        stp.ap = Action_sort(stp.ap)
        ap = stp.ap
        while ap and ap.next:
            nap = ap.next
            while nap and nap.sp == ap.sp:
                # The two actions "ap" and "nap" have the same
                # lookahead.  Figure out which one should be used.
                lemp.nconflict += resolve_conflict(ap, nap)
                nap = nap.next
            ap = ap.next

    # Report an error for each rule that can never be reduced.
    for rp in iterlinks(lemp.rule):
        rp.canReduce = False
    for i in range(lemp.nstate):
        for ap in iterlinks(lemp.sorted[i].ap):
            if ap.type == REDUCE:
                ap.x.rp.canReduce = True
    for rp in iterlinks(lemp.rule):
        if rp.canReduce:
            continue
        ErrorMsg(lemp.filename, rp.ruleline, "This rule can not be reduced.\n")
        lemp.errorcnt += 1

    return
Exemplo n.º 6
0
def FindActions(lemp):
    '''Compute the reduce actions, and resolve conflicts.'''

    # Add all of the reduce actions 
    # A reduce action is added for each element of the followset of
    # a configuration which has its dot at the extreme right.
    for i in range(lemp.nstate): # Loop over all states
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp): # Loop over all configurations
            if cfp.rp.nrhs == cfp.dot: # Is dot at extreme right?
                for j in range(lemp.nterminal):
                    if SetFind(cfp.fws, j):
                        # Add a reduce action to the state "stp" which
                        # will reduce by the rule "cfp->rp" if the
                        # lookahead symbol is "lemp->symbols[j]"
                        stp.ap = Action_add(stp.ap, REDUCE, lemp.symbols[j], cfp.rp)

    # Add the accepting token
    if lemp.start:
        sp = Symbol_find(lemp.start)
        if sp is None:
            sp = lemp.rule.lhs
    else:
        sp = lemp.rule.lhs

    # Add to the first state (which is always the starting state of
    # the finite state machine) an action to ACCEPT if the lookahead
    # is the start nonterminal.
    lemp.sorted[0].ap = Action_add(lemp.sorted[0].ap, ACCEPT, sp, None)

    # Resolve conflicts
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        stp.ap = Action_sort(stp.ap)
        ap = stp.ap
        while ap and ap.next:
            nap = ap.next
            while nap and nap.sp == ap.sp:
                # The two actions "ap" and "nap" have the same
                # lookahead.  Figure out which one should be used.
                lemp.nconflict += resolve_conflict(ap, nap)
                nap = nap.next
            ap = ap.next

    # Report an error for each rule that can never be reduced.
    for rp in iterlinks(lemp.rule):
        rp.canReduce = False
    for i in range(lemp.nstate):
        for ap in iterlinks(lemp.sorted[i].ap):
            if ap.type == REDUCE:
                ap.x.rp.canReduce = True
    for rp in iterlinks(lemp.rule):
        if rp.canReduce:
            continue
        ErrorMsg(lemp.filename, rp.ruleline, "This rule can not be reduced.\n")
        lemp.errorcnt += 1

    return
Exemplo n.º 7
0
def buildshifts(lemp, stp):
    '''Construct all successor states to the given state.  A
    "successor" state is any state which can be reached by a shift
    action.
    '''

    # stp:     The state from which successors are computed
    # cfp:     For looping thru the config closure of "stp"
    # bcfp:    For the inner loop on config closure of "stp"
    # sp:      Symbol following the dot in configuration "cfp"
    # bsp:     Symbol following the dot in configuration "bcfp"
    # newstp:  A pointer to a successor state

    # Each configuration becomes complete after it contibutes to a
    # successor state.  Initially, all configurations are incomplete.
    for cfp in iterlinks(stp.cfp):
        cfp.status = INCOMPLETE

    # Loop through all configurations of the state "stp"
    for cfp in iterlinks(stp.cfp):
        if cfp.status == COMPLETE:
            continue # Already used by inner loop
        if cfp.dot >= cfp.rp.nrhs:
            continue # Can't shift this config
        Configlist_reset() # Reset the new config set
        sp = cfp.rp.rhs[cfp.dot] # Symbol after the dot

        # For every configuration in the state "stp" which has the
        # symbol "sp" following its dot, add the same configuration to
        # the basis set under construction but with the dot shifted
        # one symbol to the right.
        for bcfp in iterlinks(cfp):
            if bcfp.status == COMPLETE:
                continue # Already used
            if bcfp.dot >= bcfp.rp.nrhs:
                continue # Can't shift this one
            bsp = bcfp.rp.rhs[bcfp.dot] # Get symbol after dot
            if not same_symbol(bsp, sp):
                continue # Must be same as for "cfp"
            bcfp.status = COMPLETE # Mark this config as used
            new = Configlist_addbasis(bcfp.rp, bcfp.dot + 1)
            new.bplp = Plink_add(new.bplp, bcfp)

        # Get a pointer to the state described by the basis
        # configuration set constructed in the preceding loop
        newstp = getstate(lemp)

        # The state "newstp" is reached from the state "stp" by a
        # shift action on the symbol "sp"
        if sp.type == MULTITERMINAL:
            for i in range(sp.nsubsym):
                stp.ap = Action_add(stp.ap, SHIFT, sp.subsym[i], newstp)
        else:
            stp.ap = Action_add(stp.ap, SHIFT, sp, newstp)

    return
Exemplo n.º 8
0
def buildshifts(lemp, stp):
    '''Construct all successor states to the given state.  A
    "successor" state is any state which can be reached by a shift
    action.
    '''

    # stp:     The state from which successors are computed
    # cfp:     For looping thru the config closure of "stp"
    # bcfp:    For the inner loop on config closure of "stp"
    # sp:      Symbol following the dot in configuration "cfp"
    # bsp:     Symbol following the dot in configuration "bcfp"
    # newstp:  A pointer to a successor state

    # Each configuration becomes complete after it contibutes to a
    # successor state.  Initially, all configurations are incomplete.
    for cfp in iterlinks(stp.cfp):
        cfp.status = INCOMPLETE

    # Loop through all configurations of the state "stp"
    for cfp in iterlinks(stp.cfp):
        if cfp.status == COMPLETE:
            continue  # Already used by inner loop
        if cfp.dot >= cfp.rp.nrhs:
            continue  # Can't shift this config
        Configlist_reset()  # Reset the new config set
        sp = cfp.rp.rhs[cfp.dot]  # Symbol after the dot

        # For every configuration in the state "stp" which has the
        # symbol "sp" following its dot, add the same configuration to
        # the basis set under construction but with the dot shifted
        # one symbol to the right.
        for bcfp in iterlinks(cfp):
            if bcfp.status == COMPLETE:
                continue  # Already used
            if bcfp.dot >= bcfp.rp.nrhs:
                continue  # Can't shift this one
            bsp = bcfp.rp.rhs[bcfp.dot]  # Get symbol after dot
            if not same_symbol(bsp, sp):
                continue  # Must be same as for "cfp"
            bcfp.status = COMPLETE  # Mark this config as used
            new = Configlist_addbasis(bcfp.rp, bcfp.dot + 1)
            new.bplp = Plink_add(new.bplp, bcfp)

        # Get a pointer to the state described by the basis
        # configuration set constructed in the preceding loop
        newstp = getstate(lemp)

        # The state "newstp" is reached from the state "stp" by a
        # shift action on the symbol "sp"
        if sp.type == MULTITERMINAL:
            for i in range(sp.nsubsym):
                stp.ap = Action_add(stp.ap, SHIFT, sp.subsym[i], newstp)
        else:
            stp.ap = Action_add(stp.ap, SHIFT, sp, newstp)

    return
Exemplo n.º 9
0
def FindFirstSets(lemp):
    '''Find all nonterminals which will generate the empty string.
    Then go back and compute the first sets of every nonterminal.  The
    first set is the set of all terminal symbols which can begin a
    string generated by that nonterminal.
    '''

    for i in range(lemp.nsymbol):
        lemp.symbols[i]._lambda = False

    for i in range(lemp.nterminal, lemp.nsymbol):
        lemp.symbols[i].firstset = SetNew()


    # First compute all lambdas
    progress = 1
    while progress:
        progress = 0
        for rp in iterlinks(lemp.rule):
            if rp.lhs._lambda:
                continue
            for i in range(rp.nrhs):
                sp = rp.rhs[i]
                assert sp.type == NONTERMINAL or not sp._lambda
                if not sp._lambda:
                    break
            else:
                rp.lhs._lambda = True
                progress = 1

    # Now compute all first sets
    progress = 1
    while progress:
        progress = 0
        for rp in iterlinks(lemp.rule):
            s1 = rp.lhs
            for i in range(rp.nrhs):
                s2 = rp.rhs[i]
                if s2.type == TERMINAL:
                    progress += SetAdd(s1.firstset, s2.index)
                    break
                elif s2.type == MULTITERMINAL:
                    for j in range(s2.nsubsym):
                        progress += SetAdd(s1.firstset, s2.subsym[j].index)
                    break
                elif s1 == s2:
                    if not s1._lambda:
                        break
                else:
                    progress += SetUnion(s1.firstset, s2.firstset)
                    if not s2._lambda:
                        break
    return
Exemplo n.º 10
0
def FindFirstSets(lemp):
    '''Find all nonterminals which will generate the empty string.
    Then go back and compute the first sets of every nonterminal.  The
    first set is the set of all terminal symbols which can begin a
    string generated by that nonterminal.
    '''

    for i in range(lemp.nsymbol):
        lemp.symbols[i]._lambda = False

    for i in range(lemp.nterminal, lemp.nsymbol):
        lemp.symbols[i].firstset = SetNew()

    # First compute all lambdas
    progress = 1
    while progress:
        progress = 0
        for rp in iterlinks(lemp.rule):
            if rp.lhs._lambda:
                continue
            for i in range(rp.nrhs):
                sp = rp.rhs[i]
                assert sp.type == NONTERMINAL or not sp._lambda
                if not sp._lambda:
                    break
            else:
                rp.lhs._lambda = True
                progress = 1

    # Now compute all first sets
    progress = 1
    while progress:
        progress = 0
        for rp in iterlinks(lemp.rule):
            s1 = rp.lhs
            for i in range(rp.nrhs):
                s2 = rp.rhs[i]
                if s2.type == TERMINAL:
                    progress += SetAdd(s1.firstset, s2.index)
                    break
                elif s2.type == MULTITERMINAL:
                    for j in range(s2.nsubsym):
                        progress += SetAdd(s1.firstset, s2.subsym[j].index)
                    break
                elif s1 == s2:
                    if not s1._lambda:
                        break
                else:
                    progress += SetUnion(s1.firstset, s2.firstset)
                    if not s2._lambda:
                        break
    return
Exemplo n.º 11
0
def FindStates(lemp):
    '''Compute all LR(0) states for the grammar.  Links are added to
    between some states so that the LR(1) follow sets can be computed
    later.
    '''

    Configlist_init()

    # Find the start symbol
    if lemp.start:
        sp = Symbol_find(lemp.start)
        if sp is None:
            ErrorMsg(lemp.filename, 0,
                     'The specified start symbol "%s" '
                     'is not in a nonterminal of the grammar.  '
                     '"%s" will be used as the start symbol instead.',
                     lemp.start, lemp.rule.lhs.name)
            lemp.errorcnt += 1
            sp = lemp.rule.lhs
    else:
        sp = lemp.rule.lhs

    # Make sure the start symbol doesn't occur on the right-hand side
    # of any rule.  Report an error if it does.  (YACC would generate
    # a new start symbol in this case.)
    for rp in iterlinks(lemp.rule):
        for i in range(rp.nrhs):
            if rp.rhs[i] == sp: # FIX ME:  Deal with multiterminals
                ErrorMsg(lemp.filename, 0,
                         'The start symbol "%s" '
                         'occurs on the right-hand side of a rule. '
                         'This will result in a parser '
                         'which does not work properly.',
                         sp.name)
                lemp.errorcnt += 1

    # The basis configuration set for the first state is all rules
    # which have the start symbol as their left-hand side.
    for rp in iterlinks(sp.rule, 'nextlhs'):
        rp.lhsStart = 1
        newcfp = Configlist_addbasis(rp, 0)
        SetAdd(newcfp.fws, 0)

    # Compute the first state.  All other states will be computed
    # automatically during the computation of the first one.  The
    # returned pointer to the first state is not used.
    getstate(lemp)
    return
Exemplo n.º 12
0
def FindRulePrecedences(xp):
    '''Find a precedence symbol of every rule in the grammar.'''

    # Those rules which have a precedence symbol coded in the input
    # grammar using the "[symbol]" construct will already have the
    # rp->precsym field filled.  Other rules take as their precedence
    # symbol the first RHS symbol with a defined precedence.  If there
    # are not RHS symbols with a defined precedence, the precedence
    # symbol field is left blank.

    for rp in iterlinks(xp.rule):
        if rp.precsym is None:
            for i in range(rp.nrhs):
                if rp.precsym is not None:
                    break
                sp = rp.rhs[i]
                if sp.type == MULTITERMINAL:
                    for j in range(sp.nsubsym):
                        if sp.subsym[j].prec >= 0:
                            rp.precsym = sp.subsym[j]
                            break

                elif sp.prec >= 0:
                    rp.precsym = rp.rhs[i]

    return
Exemplo n.º 13
0
def FindRulePrecedences(xp):
    '''Find a precedence symbol of every rule in the grammar.'''

    # Those rules which have a precedence symbol coded in the input
    # grammar using the "[symbol]" construct will already have the
    # rp->precsym field filled.  Other rules take as their precedence
    # symbol the first RHS symbol with a defined precedence.  If there
    # are not RHS symbols with a defined precedence, the precedence
    # symbol field is left blank.

    for rp in iterlinks(xp.rule):
        if rp.precsym is None:
            for i in range(rp.nrhs):
                if rp.precsym is not None:
                    break
                sp = rp.rhs[i]
                if sp.type == MULTITERMINAL:
                    for j in range(sp.nsubsym):
                        if sp.subsym[j].prec >= 0:
                            rp.precsym = sp.subsym[j]
                            break

                elif sp.prec >= 0:
                    rp.precsym = rp.rhs[i]

    return
Exemplo n.º 14
0
def FindStates(lemp):
    '''Compute all LR(0) states for the grammar.  Links are added to
    between some states so that the LR(1) follow sets can be computed
    later.
    '''

    Configlist_init()

    # Find the start symbol
    if lemp.start:
        sp = Symbol_find(lemp.start)
        if sp is None:
            ErrorMsg(
                lemp.filename, 0, 'The specified start symbol "%s" '
                'is not in a nonterminal of the grammar.  '
                '"%s" will be used as the start symbol instead.', lemp.start,
                lemp.rule.lhs.name)
            lemp.errorcnt += 1
            sp = lemp.rule.lhs
    else:
        sp = lemp.rule.lhs

    # Make sure the start symbol doesn't occur on the right-hand side
    # of any rule.  Report an error if it does.  (YACC would generate
    # a new start symbol in this case.)
    for rp in iterlinks(lemp.rule):
        for i in range(rp.nrhs):
            if rp.rhs[i] == sp:  # FIX ME:  Deal with multiterminals
                ErrorMsg(
                    lemp.filename, 0, 'The start symbol "%s" '
                    'occurs on the right-hand side of a rule. '
                    'This will result in a parser '
                    'which does not work properly.', sp.name)
                lemp.errorcnt += 1

    # The basis configuration set for the first state is all rules
    # which have the start symbol as their left-hand side.
    for rp in iterlinks(sp.rule, 'nextlhs'):
        rp.lhsStart = 1
        newcfp = Configlist_addbasis(rp, 0)
        SetAdd(newcfp.fws, 0)

    # Compute the first state.  All other states will be computed
    # automatically during the computation of the first one.  The
    # returned pointer to the first state is not used.
    getstate(lemp)
    return
Exemplo n.º 15
0
def FindLinks(lemp):
    '''Construct the propagation links.'''

    # Housekeeping detail: Add to every propagate link a pointer back
    # to the state to which the link is attached.
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp):
            cfp.stp = stp

    # Convert all backlinks into forward links.  Only the forward
    # links are used in the follow-set computation.
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp):
            for plp in iterlinks(cfp.bplp):
                other = plp.cfp
                other.fplp = Plink_add(other.fplp, cfp)

    return
Exemplo n.º 16
0
def FindLinks(lemp):
    '''Construct the propagation links.'''

    # Housekeeping detail: Add to every propagate link a pointer back
    # to the state to which the link is attached.
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp):
            cfp.stp = stp

    # Convert all backlinks into forward links.  Only the forward
    # links are used in the follow-set computation.
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp):
            for plp in iterlinks(cfp.bplp):
                other = plp.cfp
                other.fplp = Plink_add(other.fplp, cfp)

    return