Exemplo n.º 1
0
    def tokenize(self, sourcecode, filesource='<stdin>'):
        "Tokenize the given string of source code."
        self.errmsg = NCPTL_Error(filesource)

        # Keep track of all the comments we've encountered by storing
        # a mapping from line number to comment (including the initial
        # hash character).
        self.line2comment = {}

        # Initialize the lexer.
        lex.lex(module=self)

        # Repeatedly invoke the lexer and return all of the tokens it produces.
        self.lineno = 1
        lex.input(sourcecode)
        self.toklist = []
        while 1:
            # Acquire the next token and assign it a line number if necessary.
            token = lex.token()
            if not token:
                break
            if token.lineno < self.lineno:
                token.lineno = self.lineno

            # Hack: Disambiguate op_mult and star on the parser's behalf.
            if token.type in ["comma", "rparen"]:
                try:
                    if self.toklist[-1].type == "op_mult":
                        self.toklist[-1].type = "star"
                except IndexError:
                    pass

            # We now have one more valid token.
            self.toklist.append(token)
        return self.toklist
Exemplo n.º 2
0
def parse(string):
    lex.lex()
    yacc.yacc()
    rules = yacc.parse(string)

    result = []
    while rules:
        current = rules.pop(0)
        result.extend(current[1])
    return(result)
Exemplo n.º 3
0
def lex_fun(data, toks):	
	lexer = lex.lex()
	lexer.input(data)
	# Tokenize
	count = 0
	while True:
		tok = lexer.token()
		if not tok: 
			break      # No more input
		# if tok.type=='NUMBER':
		# 	tok.value = int(tok.value)
		if(tok.type=='EVAL'):
			count = 1
		if count>0:
			count += 1
		if count!=4:
			# pprint.pprint(tok)
			toks.append(tok)
		if count==4:
			a = tok.value
			if a[0]=='"' or a[0]=="'": 
				a = a[1:-1]
			else:
				a = a
			lex_fun(a, toks)
			count = 0		
Exemplo n.º 4
0
def repl():
    base_st = builtin.global_st
    while True:
        try:
            line = raw_input(">>> ")
            stream = parse.parse(line)
            stream.reverse()
            p = lex.lex(stream)
            p = p[0]
            print "lex", p

            s = syntax.replace(p, base_st)
            print "syntax", s

            res = s.evaluate()
            print res

            out = open("test.c", "w")

            out.write('#include "builtin.h"\n')
            out.write("int main() {\n")

            code = s.emit()
            out.write("\treturn " + str(code) + ";\n}\n")
            out.close()
        except Exception, e:
            traceback.print_exc(file=sys.stdout)
Exemplo n.º 5
0
def myTokenizer(data):
	tokens = (
	   'NUMBER',
	   'WORD',
	   'PUNCTUATION'
	)
	t_NUMBER    = r'[0-9]+'
	t_WORD   = r'[a-zA-Z_]+'
	t_PUNCTUATION=r'\+|\-|\*|\!|\@|\#|\$|\%|\^|\&|\(|\)|\_|\=|\~|\`|\{|\[|\]|\}|\}|\\|\||\:|\;|\"|\'|\<|\>|\,|\.|\/|\?'
	t_ignore  = ' \t\n'

	def t_error(t):
	    #print("Illegal character '%s'" % t.value[0])
	    t.lexer.skip(1)
	
	lexer = lex.lex()
	lexer.input(data)

	tokens = []
	
	while True:
	    tok = lexer.token()
	    if not tok: 
	        break      
	    tokens.append(str(tok.value))

	return tokens
Exemplo n.º 6
0
    def write(self, tokens):
        for t in tokens:
            if t.type == 'HEADER_NAME':
                # token was mis-parsed.  Do it again, without the '<', '>'.
                ta = create_token('<', '<')
                ta.filename = t.filename
                ta.lineno = t.lineno
                self.output.append(ta)

                l = lex.lex(cls=PreprocessorLexer)
                l.input(t.value, t.filename)
                l.lineno = t.lineno
                tb = l.token()
                while tb is not None:
                    if hasattr(tb, 'lexer'):
                        del tb.lexer
                    self.output.append(tb) 
                    tb = l.token()

                tc = create_token('>', '>')
                tc.filename = t.filename
                tc.lineno = t.lineno
                self.output.append(tc)

                continue

            if hasattr(t, 'lexer'):
                del t.lexer
            self.output.append(t)
Exemplo n.º 7
0
def main(infile,outbase):
    fractlexer.keep_all = True
    fractlexer.t_ignore = ""
    flex = lex.lex(fractlexer)
    
    flex.input(open(infile).read())

    # Tokenize
    toks = []
    nfrms = 0
    while 1:
        tok = flex.token()
        if not tok: break      # No more input
        
        #element.text = tok.value
        if tok.type == "FORM_ID":
            output_frm(toks,outbase,nfrms)
            toks = []
            nfrms += 1
            # special case for processing tutorial
            tok.value = myfrm.sub('MyFormula',tok.value)
            
        toks.append(processToken(tok, highlights[nfrms]))

    # print last formula
    output_frm(toks,outbase,nfrms)
Exemplo n.º 8
0
    def __init__(self, language='en'):
        self.language = language
        self.lock = Lock()

        try:
            modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
        except:
            modname = "parser"+"_"+self.__class__.__name__
        self.debugfile = modname + ".dbg"
        self.tabmodule = modname + "_" + "parsetab"

        lex.lex(module=self, debug=False)
        self.p = yacc.yacc(module=self,
                           debug=0,
                           outputdir=outputdir,
                           debugfile=self.debugfile,
                           tabmodule=self.tabmodule,)
Exemplo n.º 9
0
def runLexer():
    lexer = lex.lex(debug=False)
    lexer.input(file(sys.argv[1]).read())
    while 1:
        tok = lexer.token()
        if not tok:
            break  # No more input
        print tok
Exemplo n.º 10
0
def parse(s):

    lexer = lex.lex()
    lexer.input(s)
    parser = yacc.yacc()  # debug=1
    print("Parsing...")
    root = parser.parse(lexer=lexer)  # debug=1
    return root
Exemplo n.º 11
0
def parse(input):
	parser = Parser()
	parser.input = lex.lex(input)

	tok = parser.next()
	while tok:
		parse_token(tok, parser)
		tok = parser.next()

	return parser.items
Exemplo n.º 12
0
 def __init__(self,localeCode):
     # Use this if you want to build the parser using LALR(1) instead of SLR
     # yacc.yacc(method="LALR")
     localeparser = __import__('numbler.server.locale.parser_%s' % (str(localeCode)),{},{},'*')
     self.parser = yacc.yacc(tabmodule="localtab_%s" % localeCode,
                             tabmoduleparent="numbler.server.locale",
                             outputdir=resource_filename('numbler.server.locale',''),
                             module=localeparser,
                             optimize=self.optimize)
     self.lexer = lex.lex(module=localeparser,debug=self.debug) #optimize=self.optimize,
Exemplo n.º 13
0
 def __init__(self, string):
     self.tokens = lex(string)
     self.current_token = next(self.tokens)
     self.at_last = False
     self.reader_macros = {
         'e': self.rm_exact,
         'i': self.rm_inexact,
         't': self.rm_true,
         'f': self.rm_false,
     }
Exemplo n.º 14
0
 def buildlexer(self,**kwargs):
     # try and find a temporary workspace
     if os.environ.has_key('TMP'):
         tempDir = os.environ['TMP']
     elif os.environ.has_key('TEMP'):
         tempDir = os.environ['TEMP']
     else:
         tempDir = os.getcwd()
     os.chdir(tempDir)
     self.lexer = lex.lex(object=self, **kwargs)
Exemplo n.º 15
0
def test_comment():
	test = 'a <!--b {{unclosed--> c'
	items = lex.lex(test)

	assert_tokens(items, 'a ', '<!--', 'b {{unclosed', '-->', ' c')
	assert items[0][0] == 'text'
	assert items[1][0] == 'left_comment'
	assert items[2][0] == 'comment'
	assert items[3][0] == 'right_comment'
	assert items[4][0] == 'text'
Exemplo n.º 16
0
def getLexer(decimalSepType = '.'):

    import lexer
    ret = None
    if decimalSepType == ',':
        lexer.t_FLOAT.__doc__ = r'((((\d+(\,\d*))|(\d*\,\d+))([eE][\+\-]{0,1}\d+)?)|(\d+[eE][\+\-]{0,1}\d+))[ ]*%?'
        lexer.t_COMMA.__doc__ = r'[ \t]*\;[ \t]*' # eats space
        ret = lex.lex(module=lexer)
        ret.decsep = ','
    elif decimalSepType == '.':
        lexer.t_FLOAT.__doc__ =     r'((((\d+(\.\d*))|(\d*\.\d+))([eE][\+\-]{0,1}\d+)?)|(\d+[eE][\+\-]{0,1}\d+))[ ]*%?'
        lexer.t_COMMA.__doc__ =     r'[ \t]*\,[ \t]*' # eats space
        ret = lex.lex(module=lexer)
        ret.decsep = '.'
    elif decimalSepType == '\xd9\xab':
        lexer.t_FLOAT.__doc__ = r'((((\d+(\xd9\xab\d*))|(\d*\xd9\xab\d+))([eE][\+\-]{0,1}\d+)?)|(\d+[eE][\+\-]{0,1}\d+))[ ]*%?'
        lexer.t_COMMA.__doc__ = r'[ \t]*\;[ \t]*' # eats space        
        ret = lex.lex(module=lexer)
        ret.decsep = '\xd9\xab'

    return ret
Exemplo n.º 17
0
 def __init__(self):
     import grammar
     block.block.__init__(self, 
                          parser.pushstream(lex.lex(proarkhe.illiterate())))
     self.decladd("OMEGA", "ordinals")
     self.decladd("natural", "type")
     self.decladd("integer", "type")
     self.decladd("real", "type")
     self.decladd("Boolean", "type")
     self.decladd("true", "Boolean")
     self.decladd("false", "Boolean")
     self.visit(grammar.parentset)
     self.walk(grammar.tokenset)
Exemplo n.º 18
0
 def assert_line(self, s, a):
     s = s.replace('\\n', '\n')
     s = s.replace('\\r', '\r')
     
     tt = lex(s)
     r = []
     for t in tt:
         r.append(t.line)
     if len(r) == len(a) + 2:
         a = a[:]
         a.append(a[-1])
         a.append(a[-1])
     assert_equal(r, a)
Exemplo n.º 19
0
def doLex(utf8string,cLocale):

    l = lex.lex()
    l.input(utf8string)
    fragments = []
    
    while True:
        val = l.token()
        if not val:
            break
        else:
            fragments.append(val.value)

    return FragHandler(fragments)
Exemplo n.º 20
0
    def construct_ast(program_text):
        tokens = lex(program_text, token_rules)

        # todo: this kind of post-lexing processing should be specified by the caller somehow.
        tokens = [token for token in tokens if token.klass.name != "whitespace"]

        right_derivation = parser.parse(tokens)

        parse_tree = construct_parse_tree(right_derivation, rules, tokens)

        remove_literal_tokens(parse_tree)
        for name in reducible_node_names:
            reduce_tail_recursive_nodes(parse_tree, name)
        return parse_tree
Exemplo n.º 21
0
def create_globals(module, support, debug):
    global parser, lexer, m, spt
    if not parser:
        lexer = lex.lex()
        parser = yacc.yacc(method="LALR", debug=debug, write_tables=0)

    if module is not None:
        m = module
    else:
        m = refpolicy.Module()

    if not support:
        spt = refpolicy.SupportMacros()
    else:
        spt = support
Exemplo n.º 22
0
 def assert_lex(self, src, lexed):
     src = src.replace('\\n', '\n')
     src = src.replace('\\r', '\r')
     
     if lexed.endswith(' ...'):
         lexed = lexed[:-3] + 'Break() Eof()'
     
     l = lex(src)
     r = []
     for t in l:
         r.append(str(t))
     act = ' '.join(r)
     if act != lexed:
         print('Actual:  ', act)
         print('Expected:', lexed)
     assert_equal(act, lexed)
Exemplo n.º 23
0
def beginParse(program):
    yacc = lexyacc.yacc()
    try:
        result = yacc.parse(program.read(),lexer = lexmelon.lex())
        aux = eval({},result)
        if isinstance(aux,bool):
            aux = str(aux).lower()
        if isinstance(aux,NodoBin):
            if aux.tipo == 'LISTA':
                print  recorrer_list(aux)
        else:
            print aux
    except SyntaxError, e:
        token = e.token
        if token:
            print 'Error de sintaxis en linea ' + str(token.lineno) \
                + ' cerca de token ' + '"' + str(token.value) + '"'
        else:
            print 'Error al final del programa'
Exemplo n.º 24
0
def mk_rpn_query (query):
    """Transform a CCL query into an RPN query."""
    # need to copy or create a new lexer because it contains globals
    # PLY 1.0 lacks __copy__
    # PLY 1.3.1-1.5 have __copy__, but it's broken and returns None
    # I sent David Beazley a patch, so future PLY releases will
    # presumably work correctly.
    # Recreating the lexer each time is noticeably slower, so this solution
    # is suboptimal for PLY <= 1.5, but better than being thread-unsafe.
    # Perhaps I should have per-thread lexer instead XXX
    # with example/twisted/test.py set to parse_only, I get 277 parses/sec
    # with fixed PLY, vs. 63 parses/sec with broken PLY, on my 500 MHz PIII
    # laptop.
    
    copiedlexer = None
    if hasattr (lexer, '__copy__'):
        copiedlexer = lexer.__copy__ ()
    if copiedlexer == None:
        copiedlexer = lex.lex ()
    ast = yacc.parse (query, copiedlexer)
    return ast_to_rpn (ast)
Exemplo n.º 25
0
    def __init__(self,options,cparser):
        self.defines = ["inline=", "__inline__=", "__extension__=",
                        "_Bool=uint8_t", "__const=const", "__asm__(x)=",
                        "__asm(x)=", "CTYPESGEN=1"]

        # On OSX, explicitly add these defines to keep from getting syntax
        # errors in the OSX standard headers.
        if sys.platform == 'darwin':
            self.defines += ["__uint16_t=uint16_t",
                             "__uint32_t=uint32_t",
                             "__uint64_t=uint64_t"]

        self.matches = []
        self.output = []
        self.lexer = lex.lex(cls=PreprocessorLexer,
                             optimize=1,
                             lextab='lextab',
                             outputdir=os.path.dirname(__file__),
                             module=pplexer)

        self.options = options
        self.cparser = cparser # An instance of CParser
Exemplo n.º 26
0
def constr_testing(value, constr, var_name):
    global names

    lexer = lex.lex()
    parser = yacc.yacc()
    # print parser.parse('ASSERT(NOT(123 = 123))')

    # print constr

    for index, eachvar in enumerate(var_name):
        str_value = []
        for val in value[index]:
            if val != '':
                # TODO: input concrete value must be integer
                str_val = BitArray(uint = int(val), length = 8)
                str_value.append('0x' + str_val.hex)

        names[eachvar] = str_value
    #print names


    return ([constr[0]], yacc.parse(constr[1]))
Exemplo n.º 27
0
    def __init__(self, **kw):
        self.debug = kw.get('debug', 0)
        self.parse_result = None # undefined
        try:
            modname = os.path.split(os.path.splitext(__file__)[0])[1] \
                                + "_" + self.__class__.__name__
        except:
            modname = "parser"+"_"+self.__class__.__name__
        self.debugfile = modname + ".dbg"
        self.tabmodule = modname + "_" + "parsetab"
        #print self.debugfile, self.tabmodule

        # Build the lexer and parser
        self.ctx = Context()
        ryacc.ctx = self.ctx
        self.lexer = lex.lex(module=self, debug=self.debug)
        self.parser = ryacc.yacc(module=self,
                  debug=self.debug,
                  debugfile=self.debugfile,
                  tabmodule=self.tabmodule,
                  write_tables=0,
                  optimize=1)
Exemplo n.º 28
0
    def __init__(self, gcc_search_path=True):
        yacc.Parser.__init__(self)
        self.lexer = lex.lex(cls=PreprocessorLexer)
        PreprocessorGrammar.get_prototype().init_parser(self)

        # Map system header name to data, overrides path search and open()
        self.system_headers = {}


        self.include_path = ['/usr/local/include', '/usr/include']
        if sys.platform == 'darwin':
            self.framework_path = ['/System/Library/Frameworks',
                                   '/Library/Frameworks']
        else:
            self.framework_path = []

        if gcc_search_path:
            self.add_gcc_search_path()

        self.lexer.filename = ''

        self.defines = {}
        self.namespace = PreprocessorNamespace()
Exemplo n.º 29
0
def new():
    t_AND = r"\&"
    t_ANDAND = r"\&\&"
    t_ANDEQ = r"\&="
    t_BACKSLASH = r"\\"
    t_COLON = r":"
    t_DIV = r"\/"
    t_DIVEQ = r"\/="
    t_DOT = r"\."
    t_DOTDIV = r"\./"
    t_DOTEXP = r"\.\^"
    t_DOTMUL = r"\.\*"
    t_DOTMULEQ = r"\.\*="
    t_EQ = r"="
    t_EQEQ = r"=="
    t_EXP = r"\^"
    t_GE = r">="
    t_GT = r"\>"
    t_HANDLE = r"\@"
    t_LE = r"<="
    t_LT = r"\<"
    t_MINUS = r"\-"
    t_MINUSEQ = r"\-="
    t_MINUSMINUS = r"\--"
    t_MUL = r"\*"
    t_MULEQ = r"\*="
    t_NE = r"(~=)|(!=)"
    t_NEG = r"\~|!"
    t_OR = r"\|"
    t_OREQ = r"\|="
    t_OROR = r"\|\|"
    t_PLUS = r"\+"
    t_PLUSEQ = r"\+="
    t_PLUSPLUS = r"\+\+"

    states = (("matrix", "inclusive"), ("afterkeyword", "exclusive"))

    ws = r"(\s|(\#|%).*\n|\.\.\..*\n|\\\n)"
    ws1 = ws + "+"
    ws0 = ws + "*"
    ms = r"'([^']|(''))*'"
    os = r'"([^"\a\b\r\t\0\v\n\\]|(\\[abn0vtr\"\n\\]))*"'
    mos = "(%s)|(%s)" % (os, ms)
    id = r"[a-zA-Z_][a-zA-Z_0-9]*"

    def unescape(s):
        if s[0] == "'":
            return s[1:-1].replace("''", "'")
        else:
            return s[1:-1].decode('string_escape')
#            return s[1:-1].replace('\\"','"').replace('\\\n','').replace('\\n','\n').replace('\\\\','\\').replace('\\0','\0').replace('\\v','\v').replace('\\t','\t').replace('\\r','\r')

    @TOKEN(mos)
    def t_afterkeyword_STRING(t):
        t.value = unescape(t.value)
        t.lexer.begin("INITIAL")
        return t

    def t_afterkeyword_error(t):
        raise SyntaxError

    # A quote, immediately following any of: (1) an alphanumeric
    # charater, (2) right bracket, parenthesis or brace,
    # or (3) another TRANSPOSE, is a TRANSPOSE.  Otherwise, it starts a
    # string.  The order of the rules for TRANSPOSE (first) and STRING
    # (second) is important.  Luckily, if the quote is separated from
    # the term by line continuation (...), matlab starts a string, so
    # the above rule still holds.

    def t_TRANSPOSE(t):
        r"(?<=\w|\]|\)|\})((\.')|')+"
        # <---context ---><-quotes->
        # We let the parser figure out what that mix of quotes and
        # dot-quotes, which is kept in t.value, really means.
        return t

    @TOKEN(mos)
    def t_STRING(t):
        t.value = unescape(t.value)
        return t

    @TOKEN(r"(\.%s)?%s" % (ws0, id))
    def t_IDENT(t):
        t.lexer.lineno += t.value.count("\n")
        if t.value[0] == ".":
            # Reserved words are not reserved when used as fields.
            # So return=1 is illegal, but foo.return=1 is fine.
            t.type = "FIELD"
            return t
        if t.value in ("endwhile", "endfunction", "endif", "endfor",
                       "endswitch", "end_try_catch"):  # octave
            t.type = "END_STMT"
            return t
        if t.value == "end":
            if t.lexer.parens > 0 or t.lexer.brackets > 0 or t.lexer.braces > 0:
                t.type = "END_EXPR"
            else:
                t.type = "END_STMT"
        else:
            t.type = reserved.get(t.value, "IDENT")
            if t.type != "IDENT" and t.lexer.lexdata[t.lexer.lexpos] == "'":
                t.lexer.begin("afterkeyword")
        return t

    def t_LPAREN(t):
        r"\("
        t.lexer.parens += 1
        return t

    def t_RPAREN(t):
        r"\)"
        t.lexer.parens -= 1
        return t

    @TOKEN(ws0 + r"\]")
    def t_RBRACKET(t):  # compare w t_LBRACKET
        t.lexer.lineno += t.value.count("\n")
        t.lexer.brackets -= 1
        if t.lexer.brackets + t.lexer.braces == 0:
            t.lexer.begin("INITIAL")
        return t

    @TOKEN(r"\[" + ws0)
    def t_LBRACKET(t):  # compare w t_SEMI
        t.lexer.lineno += t.value.count("\n")
        t.lexer.brackets += 1
        if t.lexer.brackets + t.lexer.braces == 1:
            t.lexer.begin("matrix")
        return t

    # maybe we need a dedicated CELLARRAY state ???
    @TOKEN(ws0 + r"\}")
    def t_RBRACE(t):
        t.lexer.lineno += t.value.count("\n")
        t.lexer.braces -= 1
        if t.lexer.braces + t.lexer.brackets == 0:
            t.lexer.begin("INITIAL")
        return t

    @TOKEN(r"\{" + ws0)
    def t_LBRACE(t):
        t.lexer.lineno += t.value.count("\n")
        t.lexer.braces += 1
        if t.lexer.brackets + t.lexer.braces == 1:
            t.lexer.begin("matrix")
        return t

    @TOKEN(r"," + ws0)
    def t_COMMA(t):  # eating spaces is important inside brackets
        t.lexer.lineno += t.value.count("\n")
        if (t.lexer.brackets == 0 and t.lexer.parens == 0
                and t.lexer.braces == 0):
            t.type = "SEMI"
            return t
        return t

    @TOKEN(r"\;" + ws0)
    def t_SEMI(t):
        t.lexer.lineno += t.value.count("\n")
        #        if t.lexer.brackets or t.lexer.braces > 0:
        #            t.type = "CONCAT"
        return t

    def t_NUMBER(t):
        r"(0x[0-9A-Fa-f]+)|((\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?[ij]?)"
        if t.value[-1] == 'i':
            t.value = t.value[:-1] + 'j'
        t.value = eval(t.value)
        return t

    def t_NEWLINE(t):
        r'\n+'
        t.lexer.lineno += len(t.value)
        if not t.lexer.parens and not t.lexer.braces:
            t.value = ";"
            t.type = "SEMI"
            return t

    def t_comment(t):
        r"(%|\#).*"
        pass


#    @TOKEN(ws+r"(?=[-+]\S)")
#    def t_matrix_WHITESPACE(t):
#        #r"\s+(?=[-+]\S)"
#        # Whitespace, followed by + or - followed by anything but whitespace
#        t.lexer.lineno += t.value.count("\n")
#        t.type = "COMMA"
#        return t

    @TOKEN(r"(?<=\w)" + ws1 + r"(?=\()")
    def t_matrix_BAR(t):
        # Consume whitespace which follows end of name
        # and is followed a left paren.  This properly handles
        # a space between a func name and the arguments
        pass

    tend = r"(?<=[])}'\".]|\w)"
    tbeg = r"(?=[-+]?([[({'\"]|\w|\.\d))"

    @TOKEN(tend + ws1 + tbeg)
    def t_matrix_FOO(t):
        # In matrix state, consume whitespace separating two
        # terms and return a fake COMMA token.  This allows
        # parsing [1 2 3] as if it was [1,2,3].  Handle
        # with care: [x + y] vs [x +y]
        #
        # A term T is
        # (a) a name or a number
        # (b) literal string using single or doble quote
        # (c) (T) or [T] or {T} or T' or +T or -T
        #
        # Terms end with
        # (1) an alphanumeric charater \w
        # (2) single quote (in octave also double-quote)
        # (3) right parenthesis, bracket, or brace
        # (4) a dot (after a number, such as 3.
        #
        # The pattern for whitespace accounts for ellipsis as a
        # whitespace, and for the trailing junk.
        #
        # Terms start with
        # (1) an alphanumeric character
        # (2) a single or double quote,
        # (3) left paren, bracket, or brace and finally
        # (4) a dot before a digit, such as .3  .

        # TODO: what about curly brackets ???
        # TODO: what about dot followed by a letter, as in field
        #   [foo  .bar]

        t.lexer.lineno += t.value.count("\n")
        t.type = "COMMA"
        return t

    def t_ELLIPSIS(t):
        r"\.\.\..*\n"
        t.lexer.lineno += 1
        pass

    def t_SPACES(t):
        r"(\\\n|[ \t\r])+"
        pass

    def t_error(t):
        column = t.lexer.lexpos - t.lexer.lexdata.rfind(
            "\n", 0, t.lexer.lexpos)
        raise IllegalCharacterError(t.lineno, column, t.value[0])

    lexer = lex.lex(reflags=re.I)
    lexer.brackets = 0  # count open square brackets
    lexer.parens = 0  # count open parentheses
    lexer.braces = 0  # count open curly braces
    return lexer
Exemplo n.º 30
0
def tdiCompile(text, replacementArgs=_replacementArgs(())):
    import lex
    if isinstance(replacementArgs, tuple):
        return tdiCompile(text, _replacementArgs(replacementArgs))
    elif not isinstance(replacementArgs, _replacementArgs):
        raise Exception(
            "Second argument to tdiCompile, if suppied, must by a tupple")

### Lexical Tokens
    tokens = [
        'PLUS',
        'MINUS',
        'TIMES',
        'DIVIDE',
        'EQUAL',
        'EQUALS',
        'LPAREN',
        'RPAREN',
        'LBRACE',
        'RBRACE',
        'LBRACKET',
        'RBRACKET',
        'COMMA',
        'BU',
        'B',
        'WU',
        'W',
        'LU',
        'L',
        'QU',
        'Q',
        'FloatNum',
        'T',
        'T2',
        'IDENT',
        'PLACEHOLDER',
        'NAME',
        'ARROW',
        'GREATER',
        'LESS',
        'RAISE',
        'GREATER_EQUAL',
        'LESS_EQUAL',
        'NOT_EQUAL',
        'QUESTION',
        'COLON',
        'LSHIFT',
        'RSHIFT',
        'SEMICOLON',
        'IAND',
        'AND',
        'NOT',
        'PLUSPLUS',
        'MINUSMINUS',
        'SLASHSLASH',
        'IOR',
        'OR',
        'INOT',
        'EQUALSFIRST',
        'TREEPATH',
        'BACKQUOTE',
    ]
    ### Reserved keywords

    reserved = {
        'if': 'IF',
        'else': 'ELSE',
        'public': 'IDENTTYPE',
        'private': 'IDENTTYPE',
        'fun': 'FUN',
        'in': 'ARGTYPE',
        'out': 'ARGTYPE',
        'inout': 'ARGTYPE',
        'optional': 'ARGTYPE',
        'as_is': 'ARGTYPE',
        'switch': 'SWITCH',
        'case': 'CASE',
        'for': 'FOR',
        'while': 'WHILE',
        'break': 'BREAK',
        'continue': 'CONTINUE',
        'not': 'NOT_S',
        'and': 'AND_S',
        'or': 'OR_S',
        'nor': 'NOR_S',
        'mod': 'MOD_S',
        'eq': 'EQ_S',
        'ne': 'NE_S',
        'gt': 'GT_S',
        'ge': 'GE_S',
        'lt': 'LT_S',
        'le': 'LE_S',
        'default': 'DEFAULT',
    }
    tokens += list(set(reserved.values()))

    ### ignore comments denoted by /* ..... */  NOTE: Nested comments allowed which required the states trick

    states = (('nestcomment', 'exclusive'), )

    def t_nestcomment_comment(t):
        r'(.|\n)*?(\*/|/\*)'
        if t.value[-2:] == '/*':
            t.lexer.push_state('nestcomment')
        else:
            t.lexer.pop_state()

    def t_COMMENT(t):
        r'(/\*(.|\n)*?(\*/|/\*))'
        if t.value[-2:] == '/*':
            t.lexer.push_state('nestcomment')
            t.lexer.push_state('nestcomment')

### integer token including hex,binary,octal and decimal

    integer = r'0[Xx][0-9A-Fa-f]+|0[Bb][01]+|0[0-7]+|[1-9]+[0-9]*|0'

    def fix_backquotes(in_str):
        import re

        def replace_backquote_string(match):
            mstr = match.group(0)
            if len(mstr) > 4:
                ans = mstr
            elif mstr[1] == '\\':
                ans = mstr
            elif mstr[1] in 'mntr':
                ans = eval("'" + mstr + "'")
            else:
                ans = chr(int(mstr[1:], 8))
            return ans

        ans = re.sub(r'\\[0-7]+|\\[\\mntr]', replace_backquote_string, in_str)
        return ans

### string token with double quotes converted to String() instance

    @lex.TOKEN(r'"(?:[^"\\]|\\.)*"')
    def t_T(t):
        t.value = String(
            fix_backquotes(t.value).replace('\\"',
                                            '"').replace("\\'", "'").replace(
                                                '\\\\', '\\')[1:-1])
        return t

### string token with single quotes converted to String() instance

    @lex.TOKEN(r"'(?:[^'\\]|\\.)*'")
    def t_T2(t):
        t.value = String(
            fix_backquotes(t.value).replace("\\'",
                                            "'").replace('\\"', '"').replace(
                                                '\\\\', '\\')[1:-1])
        return t

### unsigned byte token converted to Uint8() instance

    @lex.TOKEN(r'(?i)(byte_unsigned|unsigned_byte)\((?P<number1>(' + integer +
               r'))\)|(?P<number2>(' + integer + r'))(bu|ub)')
    def t_BU(t):
        t.value = Uint8(
            int(
                t.lexer.lexmatch.group('number1')
                or t.lexer.lexmatch.group('number2'), 0))
        return t

### unsigned word converted to Uint16() instance

    @lex.TOKEN(r'(?i)(word_unsigned|unsigned_word)\((?P<number1>(' + integer +
               r'))\)|(?P<number2>(' + integer + r'))(wu|uw)')
    def t_WU(t):
        t.value = Uint16(
            int(
                t.lexer.lexmatch.group('number1')
                or t.lexer.lexmatch.group('number2'), 0))
        return t

### signed word converted to Int16() instance

    @lex.TOKEN(r'(?i)word\((?P<number1>(' + integer + r'))\)|(?P<number2>(' +
               integer + r'))w')
    def t_W(t):
        t.value = Int16(
            int(
                t.lexer.lexmatch.group('number1')
                or t.lexer.lexmatch.group('number2'), 0))
        return t

### unsigned quadword converted to Uint64() instance

    @lex.TOKEN(r'(?i)(quadword_unsigned|unsigned_quadword)\((?P<number1>(' +
               integer + r'))\)|(?P<number2>(' + integer + r'))(uq|qu)')
    def t_QU(t):
        t.value = Uint64(
            int(
                t.lexer.lexmatch.group('number1')
                or t.lexer.lexmatch.group('number2'), 0))
        return t

### unsigned int converted to Uint32() instance

    @lex.TOKEN(r'(?i)(long_unsigned|unsigned_long)\((?P<number1>(' + integer +
               r'))\)|(?P<number2>(' + integer + r'))(lu|ul|u)')
    def t_LU(t):
        t.value = Uint32(
            int(
                t.lexer.lexmatch.group('number1')
                or t.lexer.lexmatch.group('number2'), 0))
        return t

### signed quadword converted to Int64() instance

    @lex.TOKEN(r'(?i)quadword\((?P<number1>(' + integer +
               r'))\)|(?P<number2>(' + integer + r'))q')
    def t_Q(t):
        t.value = Int64(
            int(
                t.lexer.lexmatch.group('number1')
                or t.lexer.lexmatch.group('number2'), 0))
        return t

### Float instance converted to either Float32() or Float64() instance

    @lex.TOKEN(
        r'(?i)([0-9]+\.(?!\.)[0-9]*|[0-9]*\.[0-9]+|[0-9]+)(?P<exp>([dgef]))[-+]?[0-9]+|[0-9]+\.(?!\.)[0-9]*|[0-9]*\.[0-9]+'
    )
    def t_FloatNum(t):
        exp = t.lexer.lexmatch.group('exp')
        if exp is not None:
            exp = exp.lower()
        val = t.value.lower().replace('d', 'e').replace('g',
                                                        'e').replace('f', 'e')
        if exp is None or exp == 'e' or exp == 'f':
            t.value = Float32(val)
        else:
            t.value = Float64(val)
            if 'inf' in repr(t.value.data()):
                t.value = Float32(val)
        return t

### signed byte converted to Int8() instance

    @lex.TOKEN(r'(?i)byte\((?P<number1>(' + integer + '))\)|(?P<number2>(' +
               integer + '))b')
    def t_B(t):
        t.value = Int8(
            int(
                t.lexer.lexmatch.group('number1')
                or t.lexer.lexmatch.group('number2'), 0))
        return t

### signed int converted to Int32() instances. NOTE must be end of the scalar tokens to work for some reason.

    @lex.TOKEN(r'(?i)long\((?P<number1>(' + integer + '))\)|(?P<number2>(' +
               integer + '))l?')
    def t_L(t):
        t.value = Int32(
            int(
                t.lexer.lexmatch.group('number1')
                or t.lexer.lexmatch.group('number2'), 0))
        return t

### Ident or builtin constant converted to either Ident() instance or a Builtin() instance for constants such as $PI

    @lex.TOKEN(
        r'(?i)(\$([a-z]+[a-z0-9_\$]*)|([0-9]+[a-z_\$]+[a-z0-9_\$]*))|(_[a-z0-9_\$]*)'
    )
    def t_IDENT(t):
        if t.value.lower() == "$roprand":
            import numpy as np
            t.value = np.frombuffer(np.getbuffer(np.int32(2147483647)),
                                    dtype=np.float32)[0]
        else:
            try:
                t.value = Builtin(t.value, ())
            except Exception:
                t.value = Ident(t.value)
        return t

### Placeholders

    @lex.TOKEN(r'\$[1-9]*[0-9]*')
    def t_PLACEHOLDER(t):
        if len(t.value) == 1:
            idx = replacementArgs.idx
        else:
            idx = int(t.value[1:])
        if idx <= len(replacementArgs.args):
            t.value = makeData(replacementArgs.args[idx - 1])
        else:
            raise Exception(
                '%TDI-E-TdiMISS_ARG, Missing argument is required for function'
            )
        replacementArgs.idx = idx + 1
        return t

### Tree path \[treename::]tagname[.|:]node[.|:]node...

    pname = r'[a-z][a-z0-9$_]*'

    @lex.TOKEN(r'(?i)(((\\(' + pname + r'::)?|[\.:])?' + pname +
               r')|(\.-(\.?-)*))([\.:]' + pname + r')*')
    def t_TREEPATH(t):
        if t.value.lower() in reserved:
            t.type = reserved[t.value.lower()]
        else:
            import re
            original_value = t.value
            if re.match(r'[\s]*(\(|->)',
                        t.lexer.lexdata[t.lexer.lexpos:]) is not None:
                skip = t.value.find(':')
                if skip == 0:
                    t.lexer.lexpos = t.lexer.lexpos - len(t.value) + 1
                    t.type = 'COLON'
                    t.value = ':'
                else:
                    if skip > -1:
                        t.lexer.lexpos = t.lexer.lexpos - len(t.value) + skip
                        t.value = t.value[0:skip]
                    t.type = 'NAME'
            else:
                try:
                    t.value = Tree().getNode(t.value)
                except:
                    if t.value[0] in '.:':
                        t.value = '\\' + Tree().tree + '::TOP' + t.value
                    elif t.value[0] == '\\':
                        if t.value.find('::') == -1:
                            t.value = '\\' + Tree().tree + '::' + t.value[1:]
                    else:
                        t.value = '\\' + Tree().tree + '::TOP:' + t.value
                    t.value = TreePath(t.value.upper())
                t.value.original_value = original_value
        return t

### Various operators

    t_PLUS = r'\+'
    t_MINUS = r'-'
    t_TIMES = r'\*'
    t_DIVIDE = r'/'
    t_EQUALS = r'=='
    t_EQUAL = r'='
    t_LPAREN = r'\('
    t_RPAREN = r'\)'
    t_LBRACE = r'{'
    t_RBRACE = r'}'
    t_LBRACKET = r'\['
    t_RBRACKET = r'\]'
    t_COMMA = r','
    t_ARROW = r'->'
    t_GREATER = r'>'
    t_GREATER_EQUAL = r'>='
    t_LESS = r'<'
    t_LESS_EQUAL = r'<='
    t_NOT_EQUAL = r'!=|<>'
    t_RAISE = r'\^|\*\*'
    t_QUESTION = r'\?'
    t_LSHIFT = r'<<'
    t_RSHIFT = r'>>'
    t_SEMICOLON = r';'
    t_IAND = r'&'
    t_AND = r'&&'
    t_NOT = r'!'
    t_PLUSPLUS = r'\+\+'
    t_MINUSMINUS = r'--'
    t_SLASHSLASH = r'//'
    t_IOR = r'\|'
    t_OR = r'\|\|'
    t_INOT = r'~'
    t_EQUALSFIRST = r'\+=|-=|\*=|/=|\^=|\*\*=|<==|>==|>>=|<<=|&=|&&=|!==|\|=|\|\|=|//='
    t_BACKQUOTE = r'`'

    def t_COLON(t):
        r'\.\.|:'
        t.value = ':'
        return t

### Name token which begins with an alpha followed by zero or more of aphanumeric or underscore
### or a reserved word token such as if, while, switch, for ...

    def t_NAME(t):
        r'(?i)\b[a-z]+[a-z0-9_]*\b'
        t.type = reserved.get(t.value.lower(), 'NAME')
        return t

# Define a rule so we can track line numbers

    def t_newline(t):
        r'\n+'
        t.lexer.lineno += len(t.value)

# Error handling rule

    def t_ANY_error(t):
        print("Illegal character '%s'(%d) at line %d around '%s'" %
              (t.value[0], ord(t.value[0]), t.lexer.lineno,
               t.lexer.lexdata[t.lexer.lexpos - 10:t.lexer.lexpos + 10]))
#        t.lexer.skip(1)

# A string containing ignored characters (spaces and tabs)

    t_ANY_ignore = ' \t\r\0'

    # Build the lexer

    lex.lex(debug=0, optimize=optimized, lextab='tdilextab')

    precedence = (
        ('right', 'EQUAL'),
        ('right', 'COMMA'),
        ('left', 'COLON'),
        ('left', 'QUESTION'),
        ('left', 'OR', 'AND', 'OR_S', 'AND_S'),
        ('left', 'GREATER', 'GREATER_EQUAL', 'LESS', 'LESS_EQUAL', 'EQUALS',
         'NOT_EQUAL', 'GT_S', 'GE_S', 'LT_S', 'LE_S', 'EQ_S', 'NE_S'),
        ('left', 'SLASHSLASH'),
        ('left', 'PLUS', 'MINUS', 'IOR', 'IAND'),
        ('left', 'TIMES', 'DIVIDE'),
        ('left', 'RAISE', 'MOD_S'),
        ('right', 'RSHIFT', 'LSHIFT', 'UNOP'),
        ('left', 'LBRACKET', 'LPAREN', 'IDENTTYPE'),
    )

    def p_compilation(t):
        """compilation : statements\n| operand\n | operand SEMICOLON
        """
        t[0] = t[1]
        if isinstance(t[0], Builtin) and len(t[0].args) == 2 and isinstance(
                t[0].args[0], String) and isinstance(t[0].args[1], String):
            t[0] = String(str(t[0].args[0]) + str(t[0].args[1]))

### operands can be arguments to operators

    def p_operand(t):
        """operand : scalar\n| operation\n| parenthisized_operand\n| ident\n| vector\n| TREEPATH"""
        t[0] = t[1]

### Subscripting (i.e. _a[32])

    def p_subscript(t):
        """operation : operand vector"""
        if len(t) == 2:
            t[0] = t[1]
        else:
            args = [
                t[1],
            ]
            if isinstance(t[2], Builtin):
                for arg in t[2].args:
                    args.append(arg)
            else:
                for arg in t[2]:
                    args.append(arg)
            t[0] = Builtin('subscript', tuple(args))

### parenthisized operands such as (1+2) for specifying things like (1+2)*10

    def p_parenthisized_operand(t):
        'parenthisized_operand : LPAREN operand RPAREN'
        t[0] = t[2]

### Basic scalars supported by MDSplus

    def p_scalar(t):
        'scalar : BU \n| B \n| WU \n| W \n| LU \n| L \n| QU \n| Q \n| FloatNum \n| T \n| T2 \n| missing'
        t[0] = t[1]

### Ken variable (i.e. _gub or public _gub)

    def p_ident(t):
        """ident : IDENT\n| PLACEHOLDER\n| IDENTTYPE IDENT"""
        if len(t) == 2:
            t[0] = t[1]
        else:
            t[0] = Builtin(t[1], (str(t[2]), ))

### Missing value specified by asterisk

    def p_missing(t):
        'missing : TIMES'
        t[0] = makeData(None)

### Range constructor (a : b [:c])

    def p_range(t):
        """range : range COLON operand\n| operand COLON operand"""
        if isinstance(t[1], list):
            t[1].append(t[3])
            t[0] = t[1]
        else:
            t[0] = [t[1], t[3]]

    def p_op_range(t):
        """operation : range"""
        t[0] = Range(tuple(t[1]))

### Loop control operations (i.e. break, continue)

    def p_loop_control(t):
        'operation : BREAK\n| CONTINUE'
        t[0] = Builtin(t[1], tuple())

### Unary arithmetic operations such as ~a, -a

    def p_unaryop(t):
        """operation : NOT operand %prec UNOP\n| INOT operand %prec UNOP\n| MINUS operand %prec UNOP\n| PLUS operand %prec UNOP
        | NOT_S operand %prec UNOP"""
        ops = {
            '!': 'NOT',
            '~': 'INOT',
            '-': 'UNARY_MINUS',
            'not': 'NOT',
            '+': 'UNARY_PLUS'
        }
        if t[1] == '-' and isinstance(t[2], Scalar):
            t[0] = makeData(-t[2].data())
        elif t[1] == '+' and isinstance(t[2], Scalar):
            t[0] = t[2]
        else:
            t[0] = Builtin(ops[t[1].lower()], (t[2], ))

### Binary arithmetic operations such as a+b a>=b a^b a&&b

    def p_binop(t):
        """operation : operand PLUS operand
        | operand MINUS operand\n| operand TIMES operand\n| operand DIVIDE operand
        | operand RAISE operand\n| operand RSHIFT operand\n| operand LSHIFT operand
        | operand LESS operand\n| operand GREATER operand\n| operand LESS_EQUAL operand
        | operand GREATER_EQUAL operand\n| operand EQUALS operand \n| operand IAND operand
        | operand AND operand \n| operand OR operand \n| operand NOT_EQUAL operand
        | operand IOR operand\n| operand AND_S operand \n| operand OR_S operand\n| operand NOR_S operand
        | operand MOD_S operand
        | MOD_S LPAREN operand COMMA operand RPAREN
        | operand GT_S operand\n| operand GE_S operand\n| operand LT_S operand\n| operand LE_S operand
        | operand EQ_S operand\n| operand NE_S operand
        """
        ops = {
            '+': 'add',
            '-': 'subtract',
            '*': 'multiply',
            '/': 'divide',
            '<': 'lt',
            '>': 'gt',
            '^': 'power',
            '**': 'power',
            '<=': 'le',
            '>=': 'ge',
            '==': 'eq',
            '>>': 'shift_right',
            '<<': 'shift_left',
            '&': 'iand',
            '&&': 'and',
            '!=': 'NE',
            '<>': 'NE',
            '|': 'ior',
            '||': 'or',
            'and': 'and',
            'or': 'or',
            'nor': 'nor',
            'mod': 'MOD',
            'gt': 'gt',
            'ge': 'ge',
            'lt': 'lt',
            'le': 'le',
            'eq': 'eq',
            'ne': 'ne'
        }
        if len(t) == 4:
            t[0] = Builtin(ops[t[2].lower()], (t[1], t[3]))
        else:
            t[0] = Builtin(ops[t[1].lower()], (t[3], t[5]))

### Concatenation operator a // b [// c]
### Jump through hoops to emulate weird tdi behavior which concatenates string types at compile time except for the
### caveat that if the number of concatenation arguments is even and all strings concatenate the first n-1 and
### then make a concat function that concats the first n-1 with the nth, other wise concat them all. If any of the
### items being concatenated is not a string then don't concat anything at run time.

    class Concat(list):
        def get(self):
            compile_time_concat = True
            for arg in self:
                if not isinstance(arg, (str, String)):
                    compile_time_concat = False
                    break
            if compile_time_concat:
                c = list()
                c.append(self[0])
                if len(self) % 2 == 0:
                    for arg in self[1:-1]:
                        c[-1] = str(c[-1]) + str(arg)
                    c.append(self[-1])
                else:
                    for arg in self[1:]:
                        c[-1] = String(str(c[-1]) + str(arg))
                if len(c) > 1:
                    return Builtin('concat', tuple(c))
                else:
                    return c[0]
            else:
                return Builtin('concat', tuple(self))

    def p_concat(t):
        'concat : operand SLASHSLASH operand\n| concat SLASHSLASH operand\n operation : concat'
        if len(t) == 4:
            if isinstance(t[1], Concat):
                t[1].append(t[3])
                t[0] = t[1]
            else:
                t[0] = Concat([t[1], t[3]])
        else:
            t[0] = t[1].get()
            if isinstance(t[0], String):
                t.type = 'scalar'

### Conditional operation (i.e. a ? b : c)

    def p_conditional(t):
        'operation : operand QUESTION operand COLON operand'
        t[0] = Builtin('conditional', (t[3], t[5], t[1]))

### Ident increment/decrement (i.e. _i++, _i--, ++_i, --_i)

    def p_inc_dec(t):
        """operation : ident PLUSPLUS\n| ident MINUSMINUS\n| PLUSPLUS ident\n| MINUSMINUS ident"""
        op = {'++': '_inc', '--': '_dec'}
        if isinstance(t[1], str):
            t[0] = Builtin('pre' + op[t[1]], (t[2], ))
        else:
            t[0] = Builtin('post' + op[t[2]], (t[1], ))

### Ken variable assignment (i.e. _i=1)

    def p_assignment(t):
        'operation : operand EQUAL operand %prec EQUAL'
        t[0] = Builtin('EQUALS', (t[1], t[3]))

### Argument list for function calls (i.e. ([a[,b[,c]]])  )

    def p_arglist(t):
        """arglist : LPAREN args RPAREN\n args :\n| args operand\n| args COMMA\n| args ARGTYPE LPAREN operand RPAREN"""
        if len(t) == 4:
            t[0] = t[2]
        elif len(t) == 1:
            t[0] = list()
        else:
            if len(t) == 6:
                t[2] = Builtin(t[2], (t[4], ))
            if isinstance(t[2], str):
                if len(t[1]) == 0:
                    t[1].append(None)
                t[1].append(None)
            else:
                if len(t[1]) > 0 and (t[1][-1] is None
                                      or isinstance(t[1][-1], EmptyData)):
                    t[1][-1] = t[2]
                else:
                    t[1].append(t[2])
            t[0] = t[1]

### Function call (i.e. gub(1,2,,3)) also handles build_xxx() and make_xxx() operations

    def p_function(t):
        """operation : NAME arglist\n| EQ_S arglist\n| NE_S arglist\n| LE_S arglist
        | LT_S arglist\n| GT_S arglist\n| GE_S arglist"""
        def doBuild(name, args):
            def build_with_units(args):
                args[0].units = args[1]
                return args[0]

            def build_with_error(args):
                args[0].error = args[1]
                return args[0]

            def build_param(args):
                try:
                    args[0].help = args[1]
                    args[0].validation = args[2]
                except:
                    pass
                return args[0]

            def build_slope(args):
                new_args = list()
                if len(args) > 1:
                    new_args.append(args[1])
                else:
                    new_args.append(None)
                if len(args) > 2:
                    new_args.append(args[2])
                else:
                    new_args.append(None)
                new_args.append(args[0])
                return Range(tuple(new_args))

            def buildPath(args):
                if isinstance(args[0], (str, String)):
                    name = str(args[0])
                    if len(name) > 1 and name[0:2] == '\\\\':
                        name = name[1:]
                    ans = TreePath(name)
                else:
                    ans = Builtin('build_path', args)
                return ans

            def buildCall(args):
                ans = Call(args[1:])
                ans.retType = args[0]
                return ans

### retain original node specifiers when building a using function

            def buildUsing(args_in):
                def restoreTreePaths(arg):
                    if isinstance(arg, Compound):
                        args = list()
                        for a in arg.args:
                            args.append(restoreTreePaths(a))
                        arg.args = tuple(args)
                        ans = arg
                    elif isinstance(arg, (TreePath, TreeNode)) and hasattr(
                            arg, 'original_value'):
                        ans = TreePath(arg.original_value)
                    else:
                        ans = arg
                    return ans

                args = list()
                for arg in args_in:
                    args.append(restoreTreePaths(arg))
                ans = Builtin('using', tuple(args))
                return ans

            known_builds = {
                'BUILD_ACTION': Action,
                #BUILD_CONDITION':Condition,
                'BUILD_CONGLOM': Conglom,
                'BUILD_DEPENDENCY': Dependency,
                'BUILD_DIM': Dimension,
                'BUILD_DISPATCH': Dispatch,
                'BUILD_EVENT': Event,
                'BUILD_FUNCTION': Builtin,
                'BUILD_METHOD': Method,
                'BUILD_PARAM': build_param,
                'BUILD_PROCEDURE': Procedure,
                'BUILD_PROGRAM': Program,
                'BUILD_RANGE': Range,
                'BUILD_ROUTINE': Routine,
                'BUILD_SIGNAL': Signal,
                'BUILD_SLOPE': build_slope,
                'BUILD_WINDOW': Window,
                'BUILD_WITH_UNITS': build_with_units,
                'BUILD_CALL': buildCall,
                'BUILD_WITH_ERROR': build_with_error,
                'BUILD_OPAQUE': Opaque,
                'BUILD_PATH': buildPath,
                'USING': buildUsing,
            }
            return known_builds[name.upper()](args)

        def doMake(name, args):
            for arg in args:
                if not isinstance(
                        arg, (Array, Scalar, EmptyData)) and arg is not None:
                    raise Exception('use make opcode')
            name = name.upper().replace('MAKE_', 'BUILD_')
            if 'BUILD_' in name:
                return doBuild(name, tuple(args))
            else:
                raise Exception("not a make_ call")

        try:
            t[0] = doBuild(t[1], tuple(t[2]))
        except Exception:
            try:
                t[0] = doMake(t[1], tuple(t[2]))
            except Exception:
                try:
                    numbers = [
                        'byte', 'byte_unsigned', 'unsigned_byte', 'word',
                        'word_unsigned', 'unsigned_word', 'long',
                        'long_unsigned', 'unsigned_long', 'quadword',
                        'quadword_unsigned', 'unsigned_quadword', 'float',
                        'double', 'f_float', 'g_float', 'd_float', 'fs_float',
                        'ft_float'
                    ]
                    if t[1].lower() in numbers and (isinstance(
                            t[2][0], Scalar) or isinstance(t[2][0], Array)):
                        t[0] = Data.evaluate(Builtin(t[1], tuple(t[2])))
                    else:
                        t[0] = Builtin(t[1], tuple(t[2]))
                except Exception:
                    t[0] = Builtin('ext_function', tuple([None, t[1]] + t[2]))

### call library (i.e. library->gub(a,b,c))

    def p_rettype(t):
        'rettype : COLON NAME'
        rettypes = {
            'bu': 2,
            'wu': 3,
            'lu': 4,
            'qu': 5,
            'b': 6,
            'w': 7,
            'l': 8,
            'q': 9,
            'f': 10,
            'd': 11,
            'fc': 12,
            'dc': 13,
            't': 14,
            'dsc': 24,
            'p': 51,
            'f': 52,
            'fs': 52,
            'ft': 53,
            'fsc': 54,
            'ftc': 55
        }
        if t[2].lower() in rettypes:
            t[0] = rettypes[t[2].lower()]

    def p_call(t):
        """operation : NAME ARROW NAME arglist\n| NAME ARROW NAME rettype arglist"""
        if len(t) == 5:
            t[0] = Call(tuple([t[1], t[3]] + t[4]))
        else:
            t[0] = Call(tuple([t[1], t[3]] + t[5]), opcode=t[4])

### Loop and fun statements found inside braces and sometimes in parens

    def p_optional_semicolon(t):
        """optional_semicolon : SEMICOLON\n| empty"""
        pass

    class CommaList(list):
        def get(self):
            return Builtin('comma', tuple(self))

    def p_statement(t):
        """statement : operand SEMICOLON\n| comma_list SEMICOLON\n| comma_list\n| operand\n| SEMICOLON
        """
        if isinstance(t[1], str):
            pass
        elif isinstance(t[1], CommaList):
            t[0] = t[1].get()
        else:
            t[0] = t[1]

    def p_statements(t):
        """statements : statement\n| statements statement\n| statements braced_statements"""
        if len(t) == 2:
            t[0] = Builtin('statement', (t[1], ))
        else:
            if t[2] is None:
                t[0] = t[1]
            elif len(t[1].args) < 250:
                t[1].args = tuple(list(t[1].args) + [t[2]])
                t[0] = t[1]
            else:
                t[0] = Builtin('statement', (t[1], t[2]))

    def p_braced_statements(t):
        """braced_statements : LBRACE statements RBRACE optional_semicolon\n | LBRACE RBRACE optional_semicolon"""
        if len(t) == 5:
            if len(t[2].args) == 1:
                t[0] = t[2].args[0]
            else:
                t[0] = t[2]
        else:
            pass

### paren statement list as in if_error(_a,(_a=1;_b++),42)

    def p_statement_list(t):
        'operation : LPAREN statements RPAREN'
        if len(t[2].args) == 1:
            t[0] = t[2].args[0]
        else:
            t[0] = t[2]

### comma operand list as in _a=1,_b=2,3

    def p_comma_list(t):
        """comma_list : COMMA\n| operand COMMA\n| comma_list COMMA\n| comma_list operand"""
        if isinstance(t[1], CommaList):
            if isinstance(t[2], str):
                if t[1].lastNone:
                    t[1].append(None)
            else:
                t[1].append(t[2])
                t[1].lastNone = False
            t[0] = t[1]
        else:
            t[0] = CommaList()
            if len(t) == 2:
                t[0].append(None)
                t[0].lastNone = True
            else:
                t[0].append(t[1])
                t[0].lastNone = False

### comma operation as in (_a=1,_b=2,3)

    def p_comma_list_operation(t):
        'operation : LPAREN comma_list RPAREN'
        t[0] = t[2].get()

    def p_empty(t):
        'empty :'
        pass

### For statement (i.e. for (_x=1;_x<10;_x++){statements...} or for (...) statement

    def p_optional_comma_list(t):
        """optional_operand : comma_list\n| operand\n| empty"""
        if isinstance(t[1], CommaList):
            t[0] = t[1].get()
        else:
            t[0] = t[1]

    def p_for(t):
        """operation : FOR LPAREN optional_operand SEMICOLON operand SEMICOLON optional_operand RPAREN braced_statements
        | FOR LPAREN optional_operand SEMICOLON operand SEMICOLON optional_operand RPAREN statement"""
        t[0] = Builtin('for', (t[3], t[5], t[7], t[9]))

### If statement (i.e. if (_x<10) {_x=42;} else {_x=43;})

    def p_if_begin(t):
        """if_begin : IF LPAREN operand RPAREN"""
        t[0] = t[3]

    def p_ifelse_body(t):
        """ifelse_body : braced_statements\n| statement"""
        t[0] = t[1]

    def p_if(t):
        """operation : if_begin ifelse_body\n| if_begin ifelse_body ELSE ifelse_body"""
        args = [t[1], t[2]]
        if len(t) > 3:
            args.append(t[4])
        t[0] = Builtin('if', tuple(args))

### While statement (i.e. while(expression){statements;} )

    def p_while(t):
        """operation : WHILE LPAREN operand RPAREN braced_statements
        | WHILE LPAREN operand RPAREN statement"""
        t[0] = Builtin('while', (t[3], t[5]))

### FUN definition (i.e. public fun gub(args){statements} )

    def p_fun_arg(t):
        """fun_arg : ARGTYPE IDENT\n| ARGTYPE ARGTYPE IDENT\n| IDENT\n| ARGTYPE LPAREN IDENT RPAREN\n| ARGTYPE ARGTYPE LPAREN IDENT RPAREN"""
        if len(t) == 2:
            t[0] = t[1]
        elif len(t) == 3:
            t[0] = Builtin(t[1], (str(t[2]), ))
        elif len(t) == 4:
            t[0] = Builtin(t[1], (Builtin(t[2], (str(t[3]), )), ))
        elif len(t) == 5:
            t[0] = Builtin(t[1], (t[3], ))
        else:
            t[0] = Builtin(t[1], (Builtin(t[2], (t[4], )), ))

    def p_fun_args(t):
        """fun_args : LPAREN\n| fun_args fun_arg\n| fun_args COMMA\n| fun_args RPAREN"""
        if len(t) == 2:
            t[0] = list()
        elif isinstance(t[2], str):
            t[0] = t[1]
        else:
            t[1].append(t[2])
            t[0] = t[1]

    def p_fun(t):
        """operation : IDENTTYPE FUN NAME fun_args braced_statements
        | FUN IDENTTYPE NAME fun_args braced_statements
        | FUN NAME fun_args braced_statements"""
        args = list()
        if len(t) == 6:
            if t[1].lower() == 'fun':
                itype = t[2]
            else:
                itype = t[1]
            args.append(Builtin(itype, (t[3], )))
            args.append(t[5])
            for arg in t[4]:
                args.append(arg)
        else:
            args.append(t[2])
            args.append(t[4])
            for arg in t[3]:
                args.append(arg)
        t[0] = Builtin('fun', tuple(args))

### Vector/Array declarations (i.e. [_a,_b,_c] or [1,2,3,])

    def p_vector(t):
        """vector_part : LBRACKET operand
        | LBRACKET
        | vector_part COMMA operand
        vector : vector_part RBRACKET"""
        if isinstance(t[1], str):
            if len(t) == 2:
                t[0] = Builtin('vector', tuple())
                t[0].isarray = True
            else:
                t[0] = Builtin('vector', (t[2], ))
                t[0].isarray = isinstance(t[2], Scalar) or isinstance(
                    t[2], Array)
        elif t[2] == ',':
            args = list(t[1].args)
            if len(args) > 250:
                args = [Builtin('vector', tuple(args)), t[3]]
            else:
                args.append(t[3])
            t[1].args = tuple(args)
            t[0] = t[1]
            t[0].isarray = t[1].isarray and (isinstance(t[3], Scalar)
                                             or isinstance(t[3], Array))
        else:
            if t[1].isarray:
                t[0] = Data.evaluate(t[1])
            else:
                t[0] = Builtin('vector', t[1].args)

### Switch statement (i.e. switch(_a) {case(42) {statements} case(43) {statements}} )

    def p_case(t):
        """case : CASE LPAREN operand RPAREN braced_statements\n| CASE LPAREN operand RPAREN statement
        | CASE LPAREN operand RPAREN\n| CASE DEFAULT braced_statements
        | CASE DEFAULT statement\n| statement"""
        if len(t) == 4:
            t[0] = Builtin('default', (t[3], ))
        elif len(t) == 5:
            t[0] = Builtin('case', (None, None))
            t[0].args = (t[3], )
            t[0].doAppendCase = True
        elif len(t) == 6:
            t[0] = Builtin('case', (t[3], t[5]))
        else:
            t[0] = t[1]

    def p_cases(t):
        """cases : case\n| cases case"""
        def findCaseWithNoStatements(case, parent=None, argidx=0):
Exemplo n.º 31
0
 def __init__(self):
     '''Initiate logging, open a file to store tokens, build the lexer.'''
     self.logger = logging.getLogger("W2L")
     self.lexer = lex.lex(module=self, reflags=re.DOTALL)
Exemplo n.º 32
0
import lex

tokens = ["a","b","c","d"]

t_a = r'a'
t_b = r'b'
t_c = r'c'
t_d = r'd'


# Error handling rule
def t_error(t):
    print("Hay un caracter no valido")
    t.lexer.skip(1)

lex.lex() # Build the lexer

lex.input("abcx")
while True:
    tok = lex.token()
    if not tok: break
    print (str(tok.value))
Exemplo n.º 33
0
 def __init__(self, input):
     self.lexer = lex.lex()
     self.lexer.input(input)
Exemplo n.º 34
0
    r"\-?[0-9]+"
    t.value = int(t.value)
    return t


def t_COMMENT(t):
    r"//.*"
    return t


def t_CO(t):
    r":"
    return t


lex.lex()

#----------------------------------------------------------

commands = []
symbols = {}


def p_stuff(p):
    """stuff : 
            | statement stuff"""
    pass


def p_statement_comment(p):
    'statement : COMMENT'
Exemplo n.º 35
0
 def assert_tokens(self, inp, want):
     got = [x.tok for x in preparse(lex(inp))]
     self.assertEqual(got, want)
Exemplo n.º 36
0
 def cmd_lex(self, code):
     return lex(code)
Exemplo n.º 37
0
 def __init__(self):
     '''Initiate logging, open a file to store tokens, build the lexer.'''
     self.logger = logging.getLogger("W2L")
     self.lexer = lex.lex(module=self, reflags=re.DOTALL)
Exemplo n.º 38
0
    def rest(self):
        return self.buf[self.count:]

import readline

if __name__ == "__main__":
    import proarkhe
    import grammar
    import parser
    import block
    import exp
    import lex

    while True:
        rep = repreader(["; ", "+ "])
        e   = grammar.S(parser.pushstream(lex.lex(rep)))

        try:
            parsed = e.parse()
        except EOFError:
            break
        except lex.lex.lexerror as lexexp:
            continue

        if not parsed:
            print("Cannot parse: '{}'@{} '{}'".format(rep.buf, 
                                                      rep.count, rep.rest()))
        elif rep.rest() and not rep.rest().isspace():
            # for whatever reason "".isspace() is False
            print("Garbage after S: '{}' ({})".format(rep.buf, len(rep.buf)))
        else:
Exemplo n.º 39
0
def emmet(string):
    return _emmet_tokens(parse(lex(string))[0])
Exemplo n.º 40
0
from treeOptimizer import treeOptimizer
from codeOptimizer import codeOptimizer


def print_line():
    print('--------------------------------------------------')


if __name__ == '__main__':

    print('Complier Start')

    print('Lex')
    print_line()

    lex_obj = lex('test/case_09.txt')
    lex_obj.test()

    print('\nSyn')
    print_line()
    syn_obj = syn(lex_obj)
    syn_obj.build()
    syn_obj.test()

    print('\nSemantics')
    print_line()
    semantics_obj = semantics(syn_obj)
    semantics_obj.check()

    print('\nIrGenerate')
    print_line()
Exemplo n.º 41
0
def t_GEQ(t):
    r'>'
    return t
 
def t_NEWLINE(t):
    r"\n"
    t.lexer.lineno += 1
    pass

t_ignore = '\t '
    
def t_error(t):
    print("Illegal character '" + t.value[0] 
          + "' at line " + str(t.lexer.lineno))
    t.lexer.skip(1)
   

   
data = r""""""

lexer = lex.lex(debug=0)
# lexer.input('')
#          
# while True:
#     tok = lexer.token()
#     if not tok: break
# #     if tok.type == 'ID' or tok.type == 'DECIMAL':
# #         print(tok.type + "(" + tok.value + ")")
# #     else:
#     print(tok.type + "(" + tok.value + ")")
Exemplo n.º 42
0
#ignored characters
t_ignore = " \t"



def t_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")

def t_error(t):
     print("Illegal character '%s'" % t.value[0])
     t.lexer.skip(1)


lexer_instance = lex.lex()

if __name__ == '__main__':
    
    sample_program = """
    class Factorial {
        public static void main(String[] a){
            System.out.println(new Fac().ComputeFac(10));
        }
    }

    class Fac {
        public int ComputeFac(int num){
            in num_aux;
            if (num < 1)
                num_aux = 1;
Exemplo n.º 43
0
                                                  stmt.stmts, block.decls]]):
        return [node.children[0]]
    elif any([
            parser.istoken(node, t)
            for t in ["[", "]", "(", ")", ";", "|", ":", ":="]
    ]):
        return []
    else:
        return [node]


import trex
import reader

identities = [
    trex.trex(parser.pushstream(lex.lex(reader.stringreader(s)))) for s in [
        '"0"       ; "+" ; x:.       ;^>add>',  # 0 + x == x
        'x:.       ; "+" ; "0"       ;^>add>',  # x + 0 == x
        'x:.       ; "-" ; "0"       ;^>add>',  # x - 0 == x
        'x:"OMEGA" ; "+" ; .         ;^>add>',  # OMEGA + x == OMEGA
        'x:"OMEGA" ; "-" ; .         ;^>add>',  # OMEGA - x == OMEGA
        '.         ; "+" ; x:"OMEGA" ;^>add>',  # x + OMEGA == OMEGA
        '"1"       ; "*" ; x:.       ;^>mul>',  # 1 * x == x
        'x:.       ; "*" ; "1"       ;^>mul>',  # x * 1 == x
        'x:.       ; "/" ; "1"       ;^>mul>',  # x / 1 == x
        'x:"0"     ; "*" ; .         ;^>mul>',  # 0 * x == 0
        'x:"0"     ; "/" ; .         ;^>mul>',  # 0 / x == 0
        '.         ; "*" ; x:"0"     ;^>mul>'
    ]
]  # x * 0 == 0
Exemplo n.º 44
0
def init_parser():
    lex.lex(debug=0, optimize=1)
    yacc.yacc(debug=0, optimize=1)
Exemplo n.º 45
0
def t_LGT(t):
    r'(<=)|[<>]|(>=)'
    return t


def t_ASSIGN(t):
    r':='
    return t


t_ignore = ' \t\n'


# Error handling rule
def t_error(t):
    print("Illegal character: %s" % t.value[0])
    t.lexer.skip(1)


# Build the lexer
lexer = lex.lex()

# data = """ IFFALSE t < 3 GOTO 3 """
#
# lexer.input(data)
# while True:
#     tok = lexer.token()
#     if not tok:
#         break      # No more input
#     print(tok)
Exemplo n.º 46
0
 def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
     self.lexer = lex.lex(debug=debug,
                          optimize=optimize,
                          lextab=lextab,
                          reflags=reflags)
     self.token_stream = None
Exemplo n.º 47
0
# String literal
t_SCONST = r'\"([^\\\n]|(\\.))*?\"'

# Character constant 'c' or L'c'
t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''


def t_error(t):
    print("Illegal character %s" % repr(t.value[0]))
    t.skip(1)


#     return t

lexer = lex.lex(optimize=1)
if __name__ == "__main__":

    #
    # use Use LOC as PETSC_DIR [for reading/writing relevant files]
    #
    try:
        PETSC_DIR = sys.argv[1]
        htmlmapfile = sys.argv[2]
    except:
        raise RuntimeError('Insufficient arguments. Use: ' + sys.argv[0] +
                           'PETSC_DIR htmlmap')

    # get the version string for this release
    try:
        fd = open(os.path.join(PETSC_DIR, 'include', 'petscversion.h'))
Exemplo n.º 48
0
    
    while token is not None:
        output_file.write("type:" + token.type)
        output_file.write(" value:" + str(token.value))
        output_file.write(" line:" + str(token.lineno))
        output_file.write(" position:" + str(token.lexpos))
        output_file.write("\n")

        token = lexer.token()


if __name__ == "__main__":
    if len(argv) != 3:
        print( "Parametros invalidos.")
        print( "Uso:")
        print( "  lexer.py archivo_entrada archivo_salida")
        exit()

    input_file = open(argv[1], "r")
    text = input_file.read()
    input_file.close()

    lexer = lex.lex(module=lexer_rules)


    lexer.input(text)

    output_file = open(argv[2], "w")
    dump_tokens(lexer, output_file)
    output_file.close()
 def __init__(self):
     lex.lex(module=self)
     yacc.yacc(module=self)
Exemplo n.º 50
0
def t_FNAME(t):
    r'[a-zA-Z_][a-zA-Z0-9_]*\w*\('
    s = t.value
    m = re_name.match(s)
    if m:
        res = m.group(1)
        t.value = res
        return t

t_ignore = " \t"

def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.skip(1)

lex.lex(lextab='_lextab', optimize=1)

#############################################################################
# yacc

def _linize_list(list):
    reslist = []
    for item in list:
        reslist += item
    return reslist

# Parsing rules

precedence = (
    ('left', 'ADD', 'SUB', 'COMMA', "CONCAT", "EQ", "NE", "LE", "GE", "LT", "GT", 'POWER'),
    ('left', 'MUL', 'DIV'),
Exemplo n.º 51
0
def pascal_lex(data):
    return lex.lex(data, token_expressions)
Exemplo n.º 52
0
def t_mod(t):
    r'%.*\n'
    t.lineno += 1

# Comments
def t_comment(t):
    r' /\*(.|\n)*?\*/'
    t.lineno += t.value.count('\n')

def t_error(t):
    print "Illegal character %s at %d type %s" % (repr(t.value[0]), t.lineno, t.type)
    t.skip(1)
    
# Build the lexer
import lex
lex.lex(debug=0)

#
# Section: Helper classes and functions. 
#

# Global variablers
known_basics = {"int" : "pack_int",
                "enum" : "pack_enum", 
                "unsigned_int" : "pack_uint",
                "unsigned" : "pack_uint",
                "hyper" : "pack_hyper",
                "unsigned_hyper" : "pack_uhyper",
                "float" : "pack_float",
                "double" : "pack_double",
                # Note: xdrlib.py does not have a
Exemplo n.º 53
0
import sys
from lex import lex
from ast import parse

if __name__ == "__main__":
    text = ""
    print("Type 'exit' or 'quit' to stop")
    while (True):
        text = input("> ")
        if text.lower() in ("exit", "quit"):
            break
        tokens = lex(text)
        #print(tokens)
        parse_result = parse(tokens)
        #print(parse_result)
        if not parse_result:
            sys.stderr.write("Parse Error!\\n")
            sys.exit(1)
        ast = parse_result.value
        env = {}
        #print(parse_result)
        #print(ast)
        print(ast.eval(env))
Exemplo n.º 54
0
def relex():
    global lexer
    mk_quals()
    lexer = lex.lex()
Exemplo n.º 55
0
        if type in tree.NAMED_NODE:
            print 'WARN: %s:%s Not specified title ' % (source, slineno)

    if not title or len(title) < 0:
        title = name

    return Node(type=type,
                value=value,
                name=name,
                title=title,
                source=source,
                spos=spos,
                slineno=slineno)


__lexer__ = lex.lex(reflags=re.M)


class MutipleFileLexer(object):
    def __init__(self, f='__string__', startlineno=0, indent=0):
        self.lexer = __lexer__.clone()
        self.lexer.include_lexer = None
        self.lexer.file = f
        self.lexer.source = f
        self.lexer.startlineno = startlineno
        self.lexer.indent = indent
        self.lexer.mflexer = self
        self.mflexer = self

    def active_lexer(self):
        lexer = self.lexer
Exemplo n.º 56
0
        redirect_to_file(t.value + " line " + str(t.lineno) + " cols " +
                         str(find_column(contents, t)) + "-" +
                         str(find_column(contents, t) + len(t.value) - 1) +
                         " is " + t.type + "( truncated to " + t.value[:31] +
                         ")")
    elif t.type == 'T_StringConstant':
        redirect_to_file("\n*** Error line " + str(t.lineno) + ".")
        redirect_to_file("*** Unterminated string constant: " + t.value + "\n")
    else:
        redirect_to_file("*** Error line" + str(t.lineno) +
                         "\nUnrecognized char '%s'" % str(t.value[0]) + "\n\n")

    t.lexer.skip(1)


lexar = lex.lex()


#redirect print to outfile
def redirect_to_file(text):
    #original = sys.stdout
    #sys.stdout = open(outFileName, 'a+')
    #print('This is your redirected text:')
    print(text)
    #sys.stdout = original


#read from file

fileName = str(sys.argv[1])
#outFileName = str(sys.argv[2])
Exemplo n.º 57
0
def parse(code, grammar, actions, gotos):

    # TODOd #1: create a list of trees
    trees = []

    input = []
    code, lexeme, token = lex(code)
    newtoken = str(token)
    input.append(newtoken[6:])

    stack = []
    stack.append(0)
    while True:
        if not input[0]:
            input.pop()
            input.append("$")
            token = "$"
        else:
            token = str(input[0])
        print("stack: ", end = "")
        print(stack, end = " ")
        print("input: ", end = "")
        print(input, end = " ")
        state = stack[-1]
        action = actions[(state, token)]
        print("action: ", end = "")
        print(action)

        if action is None:
            if state == 1:
                raise Exception(errorMessage(7))
            elif state <= 2:
                raise Exception(errorMessage(8))
            elif state == 10:
                raise Exception(errorMessage(6))
            elif state == 32:
                raise Exception(errorMessage(11))
            elif state >= 22 or state >= 40:
                raise Exception(errorMessage(9))
            elif state > 45:
                raise Exception(errorMessage(10))
            return None  # tree building update


        # shift operation , fix for two digits
        if action[0] == 's':
            input.pop(0)
            stack.append(token)
            code, lexeme, token = lex(code)
            newtoken = str(token)
            input.append(newtoken[6:])
            if len(action) > 2:
                ok = int(action[1:3])
                state = ok
            else:
                state = int(action[1])
            stack.append(state)

            # TODOd #2: create a new tree, set data to token, and append it to the list of trees
            tree = Tree()
            tree.data = token
            trees.append(tree)

        # reduce operation
        elif action[0] == 'r':
            if len(action) > 2:
                production = grammar[int(action[1:3])]
            else:
                production = grammar[int(action[1])]
            lhs = getLHS(production)
            rhs = getRHS(production)
            for i in range(len(rhs) * 2):
                stack.pop()
            state = stack[-1]
            stack.append(lhs)
            stack.append(int(gotos[(state, lhs)]))

            # TODOd #3: create a new tree and set data to lhs
            newTree = Tree()
            newTree.data = lhs

            # TODOd #4: get "len(rhs)" trees from the right of the list of trees and add each of them as child of the new tree you created, preserving the left-right order
            for tree in trees[-len(rhs):]:
                newTree.add(tree)

            # TODOd #5: remove "len(rhs)" trees from the right of the list of trees
            trees = trees[:-len(rhs)]

            # TODOd #6: append the new tree to the list of trees
            trees.append(newTree)

        # not a shift or reduce operation, must be an "accept" operation
        else:
            production = grammar[0]
            lhs = getLHS(production)
            rhs = getRHS(production)

            # TODOd #7: same as reduce but using the 1st rule of the grammar
            root = Tree()
            root.data = lhs
            for tree in trees:
                root.add(tree)

            # TODOd #8: return the new tree
            return root
Exemplo n.º 58
0
import lex
from Tokens import *
from Colour import *

cfg_file = sys.argv[1][6:] # Config file
prog_name = sys.argv[2] # Code file
output_file = sys.argv[3][9:] # Output HTML file

enc = createColDict(cfg_file) # Get keyword-to-colour mapping

H = '<!DOCTYPE html><html><head><title>'+prog_name+'</title></head><body>' # HTML document output

with open(prog_name) as fp:
    code = fp.read() + '\n'

lexer = lex.lex() # Initialize lexer
lexer.input(code)
actstring = [] # Actual lexemes
tokenstring = [] # Token types
c_line = lexer.lineno

for token in lexer:
    if (token.lineno != c_line) : # once we get to next line
        H += getHTML(actstring,tokenstring,enc) # get colour-formatted HTML code for current line
        actstring = []
        tokenstring = []
        c_line = token.lineno # update current line number
    actstring.append(str(token.value))
    tokenstring.append(str(token.type))

H += getHTML(actstring,tokenstring,enc) # get colour-formatted HTML code for current line
Exemplo n.º 59
0
    return True

def tokenset(node, order):
    if order == tree.tnode.WALK_POST and (not node.start and node.children):
        node.start = node.children[0].start
        node.end   = node.children[-1].end
    return True

import block
import exp

if __name__ == "__main__":
    import sys
    import reader

    s = S(parser.pushstream(lex.lex(reader.filereader(sys.stdin))))
    if not s.parse():
        print("Cannot parse")
    elif not s.atend():
        print("Garbage after S")
    else:
        print(s.pprint())
        print(s.printexp())

        s.block = proarkhe.proarkhe()
        s.visit(parentset)
        s.walk(tokenset)
        s.reform([exp.simplify_depth], [exp.simplify_arith])
        s.visit(parentset)
        s.visit(block.blockset)
        s.visit(block.declsset)
Exemplo n.º 60
0
from lex import lex
from sintaxe import sintaxe

f = open('exemplo.txt', 'r')
stext = f.read()
tokens = lex(stext)
print(sintaxe(tokens).pretty())