/
parse_ast.py
285 lines (237 loc) · 10.1 KB
/
parse_ast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
import ast
from typez import (
Typez,
Scope,
is_none,
none_type,
)
import typez
import logging
logger=logging.getLogger('')
logger.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
logger.addHandler(console_handler)
"""
We resolve types of individual variables by symbolic execution of the code.
"""
def logentry(fn):
"""decorator for logging visit of a node"""
def _f(*args):
node=[_node for _node in args if isinstance(_node, ast.AST)][0]
if hasattr(node, 'lineno'):
line=str(node.lineno)
else:
line='?'
logger.debug(line + " "+fn.__name__)
return fn(*args)
_f.__name__=fn.__name__
return _f
class Problem:
"""encapsulates data about execution problem, that may occur (such as unknown attribute)"""
def __init__(self, node, message=None, symbol=None):
self.node=node
self.message=message
self.symbol=symbol
def __str__(self):
return "message: %s, symbol: %s (node: %s)"%(self.message, str(self.symbol), str(self.node))
def __repr__(self):
return self.__str__()
class Parser:
"""
class that gathers parsing utilites.
attributes:
modules: TODO. will be list of modules already imported
problems: list of all problems found by parsing
main parser method is eval_code. exec_* methods are used to process individual ast nodes.
"""
def __init__(self):
self.modules=[]
self.problems=[]
def warn(self, node, message=None, symbol=None):
"""add new problem"""
self.problems.append(Problem(node, message, symbol))
@logentry
def exec_BinOp(self, node, scope):
op_dict={ast.Add: '__add__',
ast.Div: '__div__'}
left_type=self.eval_code(node.left, scope)
right_type=self.eval_code(node.right, scope)
args=[left_type, right_type]
op_class=node.op.__class__
if not op_class in op_dict:
raise Exception('unknown operation '+str(node.op))
fun_type=left_type.resolve(op_dict[node.op.__class__],'class')
return self._exec_fun(fun_type, args, scope)
@logentry
def exec_FunctionDef(self, node, scope):
scope[node.name]=typez.Typez(kind='func', node=node, scope=scope)
@logentry
def exec_ClassDef(self, node, scope):
assert(isinstance(node, ast.AST))
class_scope=Scope(parent=scope)
for _node in node.body:
self.eval_code(_node, class_scope)
bases=[self.eval_code(base, scope) for base in node.bases]
scope[node.name]=Typez(kind='class', node=node, bases=bases, scope=class_scope)
@logentry
def exec_Assign(self, node, scope):
#TODO cover the case when having multiple targets such as a=b=3
if(len(node.targets)==1):
lhs=node.targets[0]
rhs=node.value
if isinstance(lhs, ast.Name):
scope[lhs.id]=self.eval_code(rhs,scope)
elif isinstance(lhs, ast.Attribute):
if isinstance(lhs.ctx, ast.Store):
#this covers cases such as x.y=sth, x.y is attrubute in Store context. In this
#case, look for appropriate __setattr__, posibly cascasing to the object.__setattr__
lhs_obj=self.eval_code(lhs.value, scope)
rhs_val=self.eval_code(rhs, scope)
__setattr__=lhs_obj.resolve('__setattr__', 'class')
args=(lhs_obj, lhs.attr, rhs_val)
self._exec_fun(__setattr__, args, scope)
else:
raise Exception('bad context')
else:
raise Exception('should not get here')
@logentry
def exec_Num(self, node, scope):
return Typez(kind='prim', node=node, value=node.n)
@logentry
def exec_Str(self, node, scope):
return Typez(kind='prim', node=node, value=node.s)
@logentry
def exec_Expr(self, node, scope):
return self.eval_code(node.value, scope)
def _exec_fun(self, fun_type, args, scope, create_scope=True):
"""
executes function with given args in the given scope. When create_scope is True, it creates new scope for the function
executing with parameter scope as its parent. Otherwise, it executes function in the given
scope.
TODO: If fun_type is of a kind 'multi' then what?
"""
if fun_type.kind=='multi':
raise Exception('not yet implemented')
#result=[]
#for single_type in fun_type.multi:
# result.append( _exec_fun(single_type, args, scope))
#return typez.type_from_list(result)
else:
if fun_type==typez.inf_setattr:
attr=args[1]
if isinstance(attr, Typez) and isinstance(attr.value, str):
attr=attr.value
if isinstance(attr,str):
args[0].scope[attr]=args[2]
if create_scope:
fun_scope=Scope(parent=scope)
else:
fun_scope=scope
def_args=fun_type.node.args.args
#binding arguments to their values; for now it only works with positional arguments
if len(args)!=len(def_args):
raise Exception('bad number of arguments (%d!=%d)'%(len(args), len(def_args)))
for i, arg in enumerate(args):
fun_scope[def_args[i].arg]=arg
results=[]
for _node in fun_type.node.body:
res=self.eval_code(_node, fun_scope)
if res:
results.append(res)
if fun_type.kind=='class':
return args[0]
return typez.type_from_list(results)
def exec_IfExp(test, body, orelse):
pass
def exec_Call(self, node, scope):
#first let's find out which function should be called. This should be easy when doing func()
#call, but it can be tricky when using constructs such as 'a.b.c.func()'
self_type=None
#covers func() case
if isinstance(node.func,ast.Name):
logger.debug("? exec_Call "+node.func.id +" "+str(node))
call_type=scope.resolve(node.func.id,'cascade')
#covers a.b.c.func() case
elif isinstance(node.func, ast.Attribute):
logger.debug("? exec_Call "+node.func.attr +" "+str(node))
call_type=self.eval_code(node.func, scope)
if typez.is_none(call_type):
self.warn(node.func, message='nonexistent_attribute', symbol=node.func.attr)
return typez.none_type
#whether we are dealing with function or with a method should be already resolved by
#exec_Attribute
if hasattr(call_type, 'is_method'):
if call_type.is_method:
self_type=self.eval_code(node.func.value, scope)
else:
raise Exception('should not get here')
#now we know what to invoke. Now we must distinguist whether it is func/method call or 'new'
#statement.
#Call-ing function or method
if call_type.kind=='func':
args=[self.eval_code(arg,scope) for arg in node.args]
if self_type:
args=[self_type]+args
return self._exec_fun(call_type, args, call_type.scope)
#Call-ing as a 'new' statement
if call_type.kind=='class':
args=[self.eval_code(arg,scope) for arg in node.args]
new=Typez(kind='obj', scope=Scope(parent=call_type.scope))
args=[new]+args
new.scope['__class__']=call_type
constructor=call_type.resolve('__init__', 'straight')
new.obj=self._exec_fun(constructor, args, new.scope)
return new
raise Exception('calling node must be either function or class')
@logentry
def exec_Module(self, node, scope):
module_scope=Scope(parent=scope)
for _node in node.body:
self.eval_code(_node, module_scope)
return module_scope
@logentry
def exec_Return(self, node, scope):
return self.eval_code(node.value, scope)
def exec_Name(self, node, scope):
consts=['None', 'True', 'False']
for const in consts:
if node.id==const:
return Typez(kind='const', value=const)
return scope.resolve(node.id, 'cascade')
def exec_Pass(self, node, scope):
pass
@logentry
def exec_Attribute(self, node, scope):
if not isinstance(node.ctx, ast.Load):
raise Exception('''Attribute should allways be executed in Load context, in the case of
x.y=z the whole assignment should be handled by exec_Assign''')
_type=self.eval_code(node.value, scope)
#we first try 'straight' resolution of
straight_resolution=_type.resolve(node.attr, 'straight')
if not is_none(straight_resolution):
return straight_resolution
else:
res=_type.resolve(node.attr, 'class')
if not is_none(res):
res.is_method=True
return res
else:
self.warn(node, message='nonexistent_attribute', symbol=node.attr)
return none_type
def eval_code(self, node, scope=None):
"""
main method of the parser, it executes code under certain node within the given scope (if
the scope is not given, we use typez.extern_scope as a highest scope possible). It returns
return value of the code.
the whole idea is to dispatch work to exec_<ast_node_name> functions; exec_name functions
are doing only minimum ammount of job to correctly cover the execution of the specific piece of
code, recursively using eval_code to evaluate values of their children nodes.
"""
if scope is None:
scope=typez.extern_scope
name='exec_'+node.__class__.__name__
handler=Parser.__dict__[name]
return handler(self, node, scope)
typez.load_externs()
if __name__ == '__main__':
pass