Пример #1
0
def build_for_regexp(regexp):
    q = {
        'marked': list(),
        'unmarked': list(),
    }
    char_position = dict()
    for char in regexp:
        char_position.setdefault(char, list())
    tree = st.build_tree(regexp)
    st.visualize_tree(tree, regexp)
    fp = st.get_followpos(tree)
    q0 = State(positions=st.firstpos(tree))
    q['unmarked'].append(q0)
    for key in char_position.keys():
        char_position[key] = st.get_char_positions(tree, key)
    while len(q['unmarked']) != 0:
        r = q['unmarked'].pop(0)
        q['marked'].append(r)
        for char in char_position.keys():
            p: set = r.positions.intersection(set(char_position[char]))
            if len(p) != 0:
                s_set = set()
                for pi in p:
                    s_set.update(fp[pi])
                if len(s_set) != 0:
                    s = State(positions=s_set)
                    q_in, s = state_in_q(q, s)
                    if not q_in:
                        q['unmarked'].append(s)
                    r.move_on_char(character=char, dest=s)
    end_key_position = max(fp.keys())
    for state in q['marked']:
        if end_key_position in state.positions:
            state.isFinalState = True
    return Automata(q0)
Пример #2
0
    def __init__(self, atomsFile, programFile):
        self.atoms = {}
        self.prog = ''
        self.PIF = []
        self.symTable = Tree()
        self.outCodes = {}
        self.lineCount = 1
        self.symTableAddress = 1
        self.fa_const = Automata('fa_const.txt')
        self.fa_id = Automata('fa_id.txt')

        with open(atomsFile, 'r') as f:
            for line in f:
                atom, code = line.split(' ')
                self.atoms[atom] = int(code)

        self.logName = 'parser_log_' + strftime('%Y%m%d_%H%M%S', gmtime()) + '.txt'
        if os.path.exists(self.logName):
            os.remove(self.logName)
        self.startParser(programFile)
def build_dfa():
    states = list()
    for i in range(5):
        states.append(State(positions={i + 1}))
    states[0].move_on_char('a', states[2])
    states[0].move_on_char('b', states[1])
    states[1].move_on_char('b', states[0])
    states[1].move_on_char('a', states[3])
    states[2].move_on_char('b', states[3])
    states[2].move_on_char('a', states[4])
    states[3].move_on_char('a', states[3])
    states[3].move_on_char('b', states[3])
    states[4].move_on_char('a', states[2])
    states[4].move_on_char('b', states[1])
    states[0].is_final = True
    states[4].is_final = True
    dfa = Automata(q0=states[0])
    return dfa
def build_min_dfa(component, states, start_state):
    global start
    count_of_states = max(component)
    groups = [State({i}) for i in range(count_of_states)]
    component.pop(0)
    states.pop(0)
    for i, state in enumerate(states):
        new_state_index = component[i] - 1
        new_state = groups[new_state_index]
        if state.is_final:
            new_state.is_final = True
        if state == start_state:
            start = new_state
        for code, dest in enumerate(state.char_transitions):
            if dest is not None and dest.positions != {0}:
                j = states.index(dest)
                new_dest_index = component[j] - 1
                new_dest = groups[new_dest_index]
                new_state.move_on_char(chr(code), new_dest)
    return Automata(start)
def build_minimize_dfa():
    states = list()
    for i in range(7):
        states.append(State(positions={i + 1}))
    states[0].move_on_char('a', states[6])
    states[0].move_on_char('b', states[1])
    states[1].move_on_char('a', states[6])
    states[1].move_on_char('b', states[0])
    states[2].move_on_char('a', states[3])
    states[2].move_on_char('b', states[4])
    states[3].move_on_char('a', states[4])
    states[3].move_on_char('b', states[5])
    states[4].move_on_char('a', states[4])
    states[4].move_on_char('b', states[4])
    states[5].move_on_char('a', states[5])
    states[5].move_on_char('b', states[4])
    states[6].move_on_char('a', states[2])
    states[6].move_on_char('b', states[2])
    states[4].is_final = True
    states[5].is_final = True
    return Automata(q0=states[0])
Пример #6
0
class Program:
    def __init__(self, atomsFile, programFile):
        self.atoms = {}
        self.prog = ''
        self.PIF = []
        self.symTable = Tree()
        self.outCodes = {}
        self.lineCount = 1
        self.symTableAddress = 1
        self.fa_const = Automata('fa_const.txt')
        self.fa_id = Automata('fa_id.txt')

        # print self.fa_const.verifySequence('-0.123121231111')
        with open(atomsFile, 'r') as f:
            for line in f:
                atom, code = line.split(' ')
                self.atoms[atom] = int(code)

        self.logName = 'parser_log_' + strftime('%Y%m%d_%H%M%S',
                                                gmtime()) + '.txt'
        if os.path.exists(self.logName):
            os.remove(self.logName)
        self.startParser(programFile)

    def Log(self, msg):
        pass

    def updatePifAndSymTable(self, prefix):
        if prefix in self.atoms:
            self.PIF.append(Atom(self.atoms[prefix], '-', prefix))
        else:
            n = CustomNode(prefix, self.symTableAddress)
            nFind = self.symTable.findNode(n)
            if nFind is None:
                # print 'This is not a keytoken:', prefix
                self.symTableAddress += 1
                self.symTable.addNode(n)
                nFind = n
            if self.fa_id.verifySequence(prefix)[0]:
                self.PIF.append(Atom(self.atoms['ID'], nFind.code, prefix))
            else:
                self.PIF.append(Atom(self.atoms['CONST'], nFind.code, prefix))

    def startParser(self, programFile):
        isPartOfId = False
        with open(programFile, 'r') as f:
            for line in f:
                line = line.strip()
                lastPos = 0
                i = 0
                while i < len(line):
                    # print i
                    crtSeq = line[lastPos:i + 1]
                    # print 'checking sequence', repr(crtSeq)
                    status, prefix = self.fa_id.verifySequence(crtSeq)
                    if not status and prefix == '':
                        status2, prefix2 = self.fa_const.verifySequence(crtSeq)
                        if status2 or prefix2 != '':
                            status, prefix = status2, prefix2
                    # print 'Crt seq:', repr(crtSeq), status, repr(prefix)
                    if not status:

                        if i < len(line) - 1 and line[i:i + 2] in {
                                '<=', '>=', '==', '<>'
                        }:
                            if prefix != '':
                                self.updatePifAndSymTable(prefix)
                                isPartOfId = False
                            self.updatePifAndSymTable(line[i:i + 2])
                            i += 2
                        elif line[i] in {
                                ' ', ';', ',', '(', ')', '<', '>', '_', '.'
                        }:
                            if line[i] != '.':
                                if prefix != '':
                                    self.updatePifAndSymTable(prefix)
                                    isPartOfId = False

                                if isPartOfId is True and line[i] != ' ':
                                    print 'Syntax error at line [%d] - [%s]!' % (
                                        self.lineCount, line[i])
                                    sys.exit(0)

                                if line[i] not in {' ', '_'}:
                                    self.updatePifAndSymTable(line[i])
                                    if line[i] == '_':
                                        isPartOfId = True
                            else:
                                isPartOfId = True
                            i += 1
                        elif line[i] == ':':
                            if prefix != '':
                                self.updatePifAndSymTable(prefix)
                                isPartOfId = False
                            if i < len(line) - 1 and line[i + 1] == '=':
                                self.updatePifAndSymTable(':=')
                                i += 2
                            else:
                                self.updatePifAndSymTable(':')
                                i += 1
                        elif line[i] in {'+', '-', '/', '*'}:
                            if prefix != '':
                                self.updatePifAndSymTable(prefix)
                                isPartOfId = False
                            if line[i] in {'+', '-'}:
                                if not self.fa_id.verifySequence(
                                        line[i + 1]
                                )[0] and not self.fa_const.verifySequence(
                                        line[i + 1]):
                                    self.updatePifAndSymTable(line[i])
                                else:
                                    isPartOfId = True
                            i += 1
                        elif line[i] in {'$', '%', '&'}:
                            isPartOfId = True
                            i += 1
                        else:
                            print 'Syntax error at line %d! Unexpected token [%s].' % (
                                self.lineCount, line[i])
                            sys.exit(0)

                        if not isPartOfId:
                            lastPos = i
                    else:
                        i += 1
                    if i == len(line) and (status or prefix != ''):
                        self.updatePifAndSymTable(prefix)
                self.lineCount += 1

    def printPIF(self):
        print '%-10s|\t%-10s|\t%-40s' % ('CODE', 'ADDRESS', 'VALUE')
        print '%-10s|\t%-10s|\t%-40s' % ('_________', '__________',
                                         '___________')
        for at in self.PIF:
            print at

    def printSymTable(self):
        self.symTable.printTree()
Пример #7
0
class Program:
    def __init__(self, atomsFile, programFile):
        self.atoms = {}
        self.prog = ''
        self.PIF = []
        self.symTable = Tree()
        self.outCodes = {}
        self.lineCount = 1
        self.symTableAddress = 1
        self.fa_const = Automata('fa_const.txt')
        self.fa_id = Automata('fa_id.txt')

        with open(atomsFile, 'r') as f:
            for line in f:
                atom, code = line.split(' ')
                self.atoms[atom] = int(code)

        self.logName = 'parser_log_' + strftime('%Y%m%d_%H%M%S', gmtime()) + '.txt'
        if os.path.exists(self.logName):
            os.remove(self.logName)
        self.startParser(programFile)

    def Log(self, msg):
        pass

    # with open(self.logName, 'a') as f:
    # 	f.write(msg+'\n')

    def initSymTable(self, tokens):
        for t in tokens:
            t = t.strip()
            if ' ' in t:
                print 'Error at line %d. Missing comma between variables [%s]' % (self.lineCount, t)
                sys.exit(0)
            if len(t) > 255:
                print 'Error at line %d. Identifier length exceeds 255 (%d)' % (self.lineCount, len(t))
                sys.exit(0)
            if not self.fa_id.verifySequence(t)[0]:
                print 'Syntax error at line %d [%s]' % (self.lineCount, t)
                sys.exit(0)
            n = CustomNode(t, self.symTableAddress)
            self.symTableAddress += 1
            self.symTable.addNode(n)
            self.PIF.append(Atom(self.atoms['ID'], n.code, t))

    def startParser(self, programFile):
        begins = 0
        ends = 0
        ifs = 0
        elses = 0
        firstLine = True
        lastLine = False
        with open(programFile, 'r') as f:
            for line in f:
                if lastLine:
                    print 'Error at line %d! Found more code after END. keyword!' % self.lineCount
                    sys.exit(0)
                line = line.strip()
                if line == '':
                    self.lineCount += 1
                    continue
                if firstLine:
                    firstLine = False
                    if not line.upper().startswith('VAR'):
                        print 'Error at line %d. Could not find declaration of variables!' % self.lineCount
                        sys.exit(0)
                    line = line[3:].lstrip()
                    self.PIF.append(Atom(self.atoms['VAR'], '-', 'VAR'))
                    self.Log('Found starting VAR')

                if ',' in line:
                    self.Log('Found declarations line: %s' % line)
                    self.parseDeclarations(line)
                elif line.startswith('READ') or line.startswith('WRITE'):
                    self.Log('Found READ/WRITE line: %s' % line)
                    self.parseReadWrite(line)
                elif line == 'BEGIN':
                    begins += 1
                    self.PIF.append(Atom(self.atoms['BEGIN'], '-', 'BEGIN'))
                elif 'END' in line and ';' in line:
                    ends += 1
                    if ends > begins:
                        print 'Error at line %d. No BEGIN to match the current END!' % self.lineCount
                        sys.exit(0)
                    self.PIF.append(Atom(self.atoms['END'], '-', 'END'))
                    self.PIF.append(Atom(self.atoms[';'], '-', ';'))
                elif line == 'END.':
                    self.PIF.append(Atom(self.atoms['END'], '-', 'END.'))
                    lastLine = True
                elif ':=' in line:
                    self.Log('Found assignment line: %s' % line)
                    self.parseAssign(line)
                elif line.startswith('IF') and line.endswith('THEN'):
                    self.Log('Found if stmt line: %s' % line)
                    self.parseIf(line)
                    ifs += 1
                elif line == 'ELSE':
                    elses += 1
                    if elses > ifs:
                        print 'Error at line %d. No IF to match the current ELSE!' % self.lineCount
                        sys.exit(0)
                    self.PIF.append(Atom(self.atoms['ELSE'], '-', 'ELSE'))
                elif line.startswith('WHILE') and line.endswith('DO'):
                    self.parseWhile(line)
                else:
                    print 'Error at line %d. Unmatched syntax [%s].' % (self.lineCount, line)
                    sys.exit(0)
                self.lineCount += 1
        if not lastLine:
            print 'Error! Could not find END. keyword!'
            sys.exit(0)

    def parseDeclarations(self, line):
        varsAndType = line.split(':')
        if len(varsAndType) != 2:
            print 'Error at line %d [%s].' % (self.lineCount, line)
            sys.exit(0)

        variables, declType = varsAndType
        tokens = variables.split(',')
        if ';' not in declType:
            print 'Error at line %d. Missing ending ;.' % self.lineCount
            sys.exit(0)
        declType = declType.replace(';','').strip()
        self.initSymTable(tokens)
        self.PIF.append(Atom(self.atoms[declType], '-', declType))
        self.PIF.append(Atom(self.atoms[';'], '-', ';'))

    def parseReadWrite(self, line):
        if 'READ' in line:
            readOrWrite = 'READ'
        else:
            readOrWrite = 'WRITE'
        line = line.replace(readOrWrite, '').strip()
        if not line.startswith('('):
            print 'Error at line %d. Missing open bracket (.' % self.lineCount
            sys.exit(0)

        self.PIF.append(Atom(self.atoms[readOrWrite.upper()], '-', readOrWrite))
        self.PIF.append(Atom(self.atoms['('], '-', '('))

        line = line.replace('(', '').strip()
        tokens = line.split(')')
        if len(tokens) != 2 or (len(tokens) == 2 and tokens[1].strip() != ';'):
            print 'Syntax error at line %d [%s]' % (self.lineCount, line)
            sys.exit(0)

        identifier = tokens[0].strip()
        identMatch = self.fa_id.verifySequence(identifier)[0]
        numMatch = self.fa_const.verifySequence(identifier)[0]
        if numMatch:
            if readOrWrite == 'READ':
                print 'Error at line %d. Cannot read constants!' % self.lineCount
                sys.exit(0)
            n = CustomNode(identifier, self.symTableAddress)
            nFind = self.symTable.findNode(n)
            if nFind is None:
                self.symTableAddress += 1
                self.symTable.addNode(n)
            self.PIF.append(Atom(self.atoms['CONST'], n.code, identifier))
        elif identMatch:
            n = CustomNode(identifier, self.symTableAddress)
            nFind = self.symTable.findNode(n)
            if nFind is None:
                self.symTableAddress += 1
                self.symTable.addNode(n)
            self.PIF.append(Atom(self.atoms['ID'], n.code, identifier))
        else:
            print 'Syntax error at line %d [%s]' % (self.lineCount, identifier)
            sys.exit(0)
        self.PIF.append(Atom(self.atoms[')'], '-', ')'))
        self.PIF.append(Atom(self.atoms[';'], '-', ';'))

    def parseExpression(self, line, hasEnding=True):
        operators = []
        tokens = []
        crtToken = ''
        i = 0
        while i < len(line):
            if line[i] not in '+-/*' and line[i:i+3] != 'MOD':
                crtToken += line[i]
                i += 1
            elif line[i] in '+-/*':
                tokens.append(crtToken)
                crtToken = ''
                operators.append(line[i])
                i += 1
            elif line[i:i+3] == 'MOD':
                # print 'found mod on line ', line, 'at index', i
                tokens.append(crtToken)
                crtToken = ''
                operators.append('MOD')
                i += 3
        if crtToken != '':
            tokens.append(crtToken)

        tokens = [t.strip() for t in tokens]

        if hasEnding:
            if ';' not in tokens[-1]:
                print 'Missing ";" on line %d: [%s]' % (self.lineCount, line)
                sys.exit(0)
            tokens[-1] = tokens[-1].replace(';', '').strip()
        self.Log('Separated expression tokens: ' + str(tokens))
        self.Log('Operators between tokens: ' + str(operators))
        # print tokens, operators
        crtOperator = 0
        for t in tokens:
            identMatch = self.fa_id.verifySequence(t)[0]
            numMatch = self.fa_const.verifySequence(t)[0]
            if identMatch:
                if len(t) > 255:
                    print 'Error at line %d. Identifier length exceeds 255 (%d)' % (self.lineCount, len(t))
                    sys.exit(0)
                n = CustomNode(t, self.symTableAddress)
                nFind = self.symTable.findNode(n)
                if nFind is None:
                    self.symTableAddress += 1
                    self.symTable.addNode(n)
                self.PIF.append(Atom(self.atoms['ID'], n.code, t))
            elif numMatch:
                n = CustomNode(t, self.symTableAddress)
                nFind = self.symTable.findNode(n)
                if nFind is None:
                    self.symTable.addNode(n)
                    self.symTableAddress += 1
                self.PIF.append(Atom(self.atoms['CONST'], n.code, t))
            else:
                self.errSyntax(t)

            if len(operators) > 0 and crtOperator < len(operators):
                # self.Log(str(len(operators)) + ' - ' + str(crtOperator))
                self.PIF.append(Atom(self.atoms[operators[crtOperator]], '-', operators[crtOperator]))
                crtOperator += 1
        if hasEnding:
            self.PIF.append(Atom(self.atoms[';'], '-', ';'))

    def parseAssign(self, line):
        leftRight = [t.strip() for t in line.split(':=')]
        self.Log('Separated assignment parts: ' + str(leftRight))
        left, right = leftRight[0], leftRight[1]
        if not self.fa_id.verifySequence(left)[0]:
            self.errSyntax(left)
        if len(left) > 255:
            print 'Error at line %d. Identifier length exceeds 255 (%d)' % (self.lineCount, len(left))
            sys.exit(0)
        n = CustomNode(left, self.symTableAddress)
        nFind = self.symTable.findNode(n)
        if nFind is None:
            self.symTable.addNode(n)
            self.symTableAddress += 1
        self.PIF.append(Atom(self.atoms['ID'], n.code, left))
        self.PIF.append(Atom(self.atoms[':='], '-', ':='))
        self.parseExpression(right)

    def parseIf(self, line):
        self.PIF.append(Atom(self.atoms['IF'], '-', 'IF'))
        line = line.replace('IF', '').strip()
        line = line.replace('THEN', '').strip()

        crtComparator = None
        for comp in {'>', '<', '>=', '<=', '<>', '=='}:
            if comp in line:
                crtComparator = comp
                break

        if crtComparator is None:
            print 'Error at line %d. Bad expression after IF: %s!' % (self.lineCount, line)
            sys.exit(0)

        leftRight = line.split(crtComparator)
        self.Log('Separated expression parts: ' + str(leftRight))
        left, right = leftRight[0], leftRight[1]
        self.parseExpression(left, False)
        self.PIF.append(Atom(self.atoms[crtComparator], '-', crtComparator))
        self.parseExpression(right, False)
        self.PIF.append(Atom(self.atoms['THEN'], '-', 'THEN'))

    def parseWhile(self, line):
        self.PIF.append(Atom(self.atoms['WHILE'], '-', 'WHILE'))
        line = line.replace('WHILE', '').replace('DO', '').strip()

        crtComparator = None
        for comp in {'>', '<', '>=', '<=', '<>', '=='}:
            if comp in line:
                crtComparator = comp
                break

        if crtComparator is None:
            print 'Error at line %d. Bad expression after IF: %s!' % (self.lineCount, line)
            sys.exit(0)

        leftRight = line.split(crtComparator)
        self.Log('Separated expression parts: ' + str(leftRight))
        left, right = leftRight[0], leftRight[1]
        self.parseExpression(left, False)
        self.PIF.append(Atom(self.atoms[crtComparator], '-', crtComparator))
        self.parseExpression(right, False)
        self.PIF.append(Atom(self.atoms['DO'], '-', 'DO'))

    def errUndeclaredVar(self, varName):
        print 'Error at line %d. Usage of undeclared variable [%s]' % (self.lineCount, varName)
        sys.exit(0)

    def errSyntax(self, unkToken):
        print 'Syntax error at line %d. Unknown token [%s]' % (self.lineCount, unkToken)
        sys.exit(0)

    def printAtomRules(self):
        print self.atoms

    def printProgram(self):
        print self.prog

    def printPIF(self):
        print '%-10s|\t%-10s|\t%-40s' % ('CODE', 'ADDRESS', 'VALUE')
        print '%-10s|\t%-10s|\t%-40s' % ('_________', '__________', '___________')
        for at in self.PIF:
            print at

    def printSymTable(self):
        self.symTable.printTree()