def test_if_in_for(): code = bug.__code__ scan = get_scanner(PYTHON_VERSION) print(PYTHON_VERSION) if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY: n = scan.setup_code(code) scan.build_lines_data(code, n) scan.build_prev_op(n) fjt = scan.find_jump_targets() assert {15: [3], 69: [66], 63: [18]} == fjt assert scan.structs == \ [{'start': 0, 'end': 72, 'type': 'root'}, {'start': 18, 'end': 66, 'type': 'if-then'}, {'start': 31, 'end': 59, 'type': 'for-loop'}, {'start': 62, 'end': 63, 'type': 'for-else'}] elif 3.2 < PYTHON_VERSION <= 3.4: scan.code = array('B', code.co_code) scan.build_lines_data(code) scan.build_prev_op() fjt = scan.find_jump_targets() assert {69: [66], 63: [18]} == fjt assert scan.structs == \ [{'end': 72, 'type': 'root', 'start': 0}, {'end': 66, 'type': 'if-then', 'start': 6}, {'end': 63, 'type': 'if-then', 'start': 18}, {'end': 59, 'type': 'for-loop', 'start': 31}, {'end': 63, 'type': 'for-else', 'start': 62}] else: assert True, "FIXME: should note fixed" return
def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False): """ Parse a code object to an abstract syntax tree representation. :param version: The python version this code is from as a float, for example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc. :param co: The code object to parse. :param out: File like object to write the output to. :param showasm: Flag which determines whether the disassembled and ingested code is written to sys.stdout or not. :param parser_debug: dict containing debug flags for the spark parser. :return: Abstract syntax tree representation of the code object. """ assert iscode(co) from uncompyle6.scanner import get_scanner scanner = get_scanner(version, is_pypy) tokens, customize = scanner.ingest(co) maybe_show_asm(showasm, tokens) # For heavy grammar debugging # parser_debug = {'rules': True, 'transition': True, 'reduce' : True, # 'showstack': 'full'} p = get_python_parser(version, parser_debug) return parse(p, tokens, customize)
def test_template_engine(): s = StringIO() sys_version = float(sys.version[0:3]) scanner = get_scanner(sys_version, is_pypy=False) scanner.insts = [] sw = SourceWalker(2.7, s, scanner) sw.ast = NONE sw.template_engine(('--%c--', 0), NONE) print(sw.f.getvalue()) assert sw.f.getvalue() == '--None--'
def test_grammar(): def check_tokens(tokens, opcode_set): remain_tokens = set(tokens) - opcode_set remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens]) remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set assert remain_tokens == set([]), \ "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dump_grammar()) p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY) lhs, rhs, tokens, right_recursive = p.check_sets() expect_lhs = set(['expr1024', 'pos_arg']) unused_rhs = set([ 'build_list', 'call_function', 'mkfunc', 'mklambda', 'unpack', 'unpack_list' ]) expect_right_recursive = frozenset([ ('designList', ('designator', 'DUP_TOP', 'designList')) ]) if PYTHON3: expect_lhs.add('load_genexpr') unused_rhs = unused_rhs.union( set(""" except_pop_except genexpr classdefdeco2 listcomp """.split())) if 3.0 <= PYTHON_VERSION: expect_lhs.add("annotate_arg") expect_lhs.add("annotate_tuple") unused_rhs.add("mkfunc_annotate") pass else: expect_lhs.add('kwarg') assert expect_lhs == set(lhs) assert unused_rhs == set(rhs) assert expect_right_recursive == right_recursive s = get_scanner(PYTHON_VERSION, IS_PYPY) ignore_set = set(""" JUMP_BACK CONTINUE COME_FROM COME_FROM_EXCEPT COME_FROM_EXCEPT_CLAUSE COME_FROM_LOOP COME_FROM_WITH COME_FROM_FINALLY ELSE LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LAMBDA_MARKER RETURN_END_IF RETURN_END_IF_LAMBDA RETURN_VALUE_LAMBDA RETURN_LAST """.split()) if 2.6 <= PYTHON_VERSION <= 2.7: opcode_set = set(s.opc.opname).union(ignore_set) check_tokens(tokens, opcode_set) elif PYTHON_VERSION == 3.4: ignore_set.add('LOAD_CLASSNAME') ignore_set.add('STORE_LOCALS') opcode_set = set(s.opc.opname).union(ignore_set) check_tokens(tokens, opcode_set)
def test_if_in_for(): code = bug.__code__ scan = get_scanner(PYTHON_VERSION) print(PYTHON_VERSION) if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY: n = scan.setup_code(code) scan.build_lines_data(code, n) scan.build_prev_op(n) fjt = scan.find_jump_targets(False) ## FIXME: the data below is wrong. ## we get different results currenty as well. ## We need to probably fix both the code ## and the test below # assert {15: [3], 69: [66], 63: [18]} == fjt # assert scan.structs == \ # [{'start': 0, 'end': 72, 'type': 'root'}, # {'start': 15, 'end': 66, 'type': 'if-then'}, # {'start': 31, 'end': 59, 'type': 'for-loop'}, # {'start': 62, 'end': 63, 'type': 'for-else'}] code = bug_loop.__code__ n = scan.setup_code(code) scan.build_lines_data(code, n) scan.build_prev_op(n) fjt = scan.find_jump_targets(False) assert{64: [42], 67: [42, 42], 42: [16, 41], 19: [6]} == fjt assert scan.structs == [ {'start': 0, 'end': 80, 'type': 'root'}, {'start': 3, 'end': 64, 'type': 'if-then'}, {'start': 6, 'end': 15, 'type': 'try'}, {'start': 19, 'end': 38, 'type': 'except'}, {'start': 45, 'end': 67, 'type': 'while-loop'}, {'start': 70, 'end': 64, 'type': 'while-else'}, # previous bug was not mistaking while-loop for if-then {'start': 48, 'end': 67, 'type': 'while-loop'}] elif 3.2 < PYTHON_VERSION <= 3.4: bytecode = Bytecode(code, scan.opc) scan.code = array('B', code.co_code) scan.build_lines_data(code) scan.build_prev_op() scan.insts = list(bytecode) fjt = scan.find_jump_targets(False) assert {69: [66], 63: [18]} == fjt assert scan.structs == \ [{'end': 72, 'type': 'root', 'start': 0}, {'end': 66, 'type': 'if-then', 'start': 6}, {'end': 63, 'type': 'if-then', 'start': 18}, {'end': 59, 'type': 'for-loop', 'start': 31}, {'end': 63, 'type': 'for-else', 'start': 62}] else: assert True, "FIXME: should note fixed" return
def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG): assert iscode(co) from uncompyle6.scanner import get_scanner scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) p = get_python_parser(version, parser_debug) return parse(p, tokens, customize)
def align_deparse_code(version, co, out=sys.stderr, showasm=False, showast=False, showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False): """ ingests and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error scanner = get_scanner(version, is_pypy=is_pypy) tokens, customize = scanner.ingest(co, code_objects=code_objects) maybe_show_asm(showasm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) if showgrammar: debug_parser['reduce'] = showgrammar debug_parser['errorstack'] = True # Build AST from disassembly. deparsed = AligningWalker(version, scanner, out, showast=showast, debug_parser=debug_parser, compile_mode=compile_mode, is_pypy = is_pypy) isTopLevel = co.co_name == '<module>' deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel) assert deparsed.ast == 'stmts', 'Should have parsed grammar start' del tokens # save memory deparsed.mod_globs = find_globals(deparsed.ast, set()) # convert leading '__doc__ = "..." into doc string try: if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]): deparsed.print_docstring('', co.co_consts[0]) del deparsed.ast[0] if deparsed.ast[-1] == RETURN_NONE: deparsed.ast.pop() # remove last node # todo: if empty, add 'pass' except: pass # What we've been waiting for: Generate source from AST! deparsed.gen_source(deparsed.ast, co.co_name, customize) for g in deparsed.mod_globs: deparsed.write('# global %s ## Warning: Unused global' % g) if deparsed.ERROR: raise SourceWalkerError("Deparsing stopped due to parse error") return deparsed
def test_grammar(): def check_tokens(tokens, opcode_set): remain_tokens = set(tokens) - opcode_set remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set assert remain_tokens == set([]), \ "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dumpGrammar()) p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY) lhs, rhs, tokens, right_recursive = p.checkSets() expect_lhs = set(['expr1024', 'pos_arg']) unused_rhs = set(['build_list', 'call_function', 'mkfunc', 'mklambda', 'unpack', 'unpack_list']) expect_right_recursive = [['designList', ('designator', 'DUP_TOP', 'designList')]] if PYTHON3: expect_lhs.add('load_genexpr') unused_rhs = unused_rhs.union(set(""" except_pop_except genexpr classdefdeco2 listcomp """.split())) if 3.0 <= PYTHON_VERSION: expect_lhs.add("annotate_arg") expect_lhs.add("annotate_tuple") unused_rhs.add("mkfunc_annotate") pass else: expect_lhs.add('kwarg') assert expect_lhs == set(lhs) assert unused_rhs == set(rhs) assert expect_right_recursive == right_recursive s = get_scanner(PYTHON_VERSION, IS_PYPY) ignore_set = set( """ JUMP_BACK CONTINUE RETURN_END_IF COME_FROM COME_FROM_EXCEPT COME_FROM_EXCEPT_CLAUSE COME_FROM_LOOP COME_FROM_WITH COME_FROM_FINALLY ELSE LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LAMBDA_MARKER RETURN_LAST """.split()) if 2.6 <= PYTHON_VERSION <= 2.7: opcode_set = set(s.opc.opname).union(ignore_set) check_tokens(tokens, opcode_set) elif PYTHON_VERSION == 3.4: ignore_set.add('LOAD_CLASSNAME') ignore_set.add('STORE_LOCALS') opcode_set = set(s.opc.opname).union(ignore_set) check_tokens(tokens, opcode_set)
def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG): import inspect assert hasattr(co, 'co_name') from uncompyle6.scanner import get_scanner scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) p = get_python_parser(version, parser_debug) return parse(p, tokens, customize)
def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, showgrammar=False): assert iscode(co) # store final output stream for case of error scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) debug_parser = dict(PARSER_DEFAULT_DEBUG) debug_parser['reduce'] = showgrammar # Build AST from disassembly. # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast) deparsed = FragmentsWalker(version, scanner, showast=showast, debug_parser=debug_parser) deparsed.ast = deparsed.build_ast(tokens, customize) assert deparsed.ast == 'stmts', 'Should have parsed grammar start' del tokens # save memory # convert leading '__doc__ = "..." into doc string assert deparsed.ast == 'stmts' deparsed.mod_globs = pysource.find_globals(deparsed.ast, set()) # Just when you think we've forgotten about what we # were supposed to to: Generate source from AST! deparsed.gen_source(deparsed.ast, co.co_name, customize) deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text)) deparsed.fixup_parents(deparsed.ast, None) for g in deparsed.mod_globs: deparsed.write('# global %s ## Warning: Unused global' % g) if deparsed.ERROR: raise deparsed.ERROR return deparsed
def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG): assert iscode(co) from uncompyle6.scanner import get_scanner scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) # For heavy grammar debugging parser_debug = {'rules': True, 'transition': True, 'reduce' : True} p = get_python_parser(version, parser_debug) return parse(p, tokens, customize)
def test_if_in_for(): code = bug.__code__ scan = get_scanner(PYTHON_VERSION) print(PYTHON_VERSION) if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY: n = scan.setup_code(code) scan.build_lines_data(code, n) scan.build_prev_op(n) fjt = scan.find_jump_targets(False) assert {15: [3], 69: [66], 63: [18]} == fjt assert scan.structs == \ [{'start': 0, 'end': 72, 'type': 'root'}, {'start': 15, 'end': 66, 'type': 'if-then'}, {'start': 31, 'end': 59, 'type': 'for-loop'}, {'start': 62, 'end': 63, 'type': 'for-else'}] code = bug_loop.__code__ n = scan.setup_code(code) scan.build_lines_data(code, n) scan.build_prev_op(n) fjt = scan.find_jump_targets(False) assert{64: [42], 67: [42, 42], 42: [16, 41], 19: [6]} == fjt assert scan.structs == [ {'start': 0, 'end': 80, 'type': 'root'}, {'start': 3, 'end': 64, 'type': 'if-then'}, {'start': 6, 'end': 15, 'type': 'try'}, {'start': 19, 'end': 38, 'type': 'except'}, {'start': 45, 'end': 67, 'type': 'while-loop'}, {'start': 70, 'end': 64, 'type': 'while-else'}, # previous bug was not mistaking while-loop for if-then {'start': 48, 'end': 67, 'type': 'while-loop'}] elif 3.2 < PYTHON_VERSION <= 3.4: scan.code = array('B', code.co_code) scan.build_lines_data(code) scan.build_prev_op() fjt = scan.find_jump_targets(False) assert {69: [66], 63: [18]} == fjt assert scan.structs == \ [{'end': 72, 'type': 'root', 'start': 0}, {'end': 66, 'type': 'if-then', 'start': 6}, {'end': 63, 'type': 'if-then', 'start': 18}, {'end': 59, 'type': 'for-loop', 'start': 31}, {'end': 63, 'type': 'for-else', 'start': 62}] else: assert True, "FIXME: should note fixed" return
def disco(version, co, out=None, is_pypy=False): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) scanner = get_scanner(version, is_pypy=is_pypy) queue = deque([co]) disco_loop(scanner.ingest, queue, real_out)
def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, showgrammar=False): assert iscode(co) # store final output stream for case of error scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) debug_parser = dict(PARSER_DEFAULT_DEBUG) if showgrammar: debug_parser['reduce'] = showgrammar debug_parser['errorstack'] = True # Build AST from disassembly. # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast) deparsed = FragmentsWalker(version, scanner, showast=showast, debug_parser=debug_parser) deparsed.ast = deparsed.build_ast(tokens, customize) assert deparsed.ast == 'stmts', 'Should have parsed grammar start' del tokens # save memory # convert leading '__doc__ = "..." into doc string assert deparsed.ast == 'stmts' deparsed.mod_globs = pysource.find_globals(deparsed.ast, set()) # Just when you think we've forgotten about what we # were supposed to to: Generate source from AST! deparsed.gen_source(deparsed.ast, co.co_name, customize) deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text)) deparsed.fixup_parents(deparsed.ast, None) for g in deparsed.mod_globs: deparsed.write('# global %s ## Warning: Unused global' % g) if deparsed.ERROR: raise deparsed.ERROR return deparsed
def disco(version, co, out=None, is_pypy=False): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) scanner = get_scanner(version, is_pypy=is_pypy) queue = deque([co]) disco_loop(scanner.disassemble, queue, real_out)
def disco(version, co, out=None): """ diassembles and deparses a given code block 'co' """ assert hasattr(co, 'co_name') # store final output stream for case of error real_out = out or sys.stdout print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) for t in tokens: print(t, file=real_out) print(file=out)
def disco(version, co, out=None): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) for t in tokens: print(t, file=real_out) print(file=out)
def test_get_scanner(): # See that we can retrieve a scanner using a full version number assert get_scanner('2.7.13')
def test_if_in_for(): code = bug.__code__ scan = get_scanner(PYTHON_VERSION_TRIPLE) if (2, 7) <= PYTHON_VERSION_TRIPLE < (3, 1) and not IS_PYPY: scan.build_instructions(code) fjt = scan.find_jump_targets(False) ## FIXME: the data below is wrong. ## we get different results currenty as well. ## We need to probably fix both the code ## and the test below # assert {15: [3], 69: [66], 63: [18]} == fjt # assert scan.structs == \ # [{'start': 0, 'end': 72, 'type': 'root'}, # {'start': 15, 'end': 66, 'type': 'if-then'}, # {'start': 31, 'end': 59, 'type': 'for-loop'}, # {'start': 62, 'end': 63, 'type': 'for-else'}] code = bug_loop.__code__ scan.build_instructions(code) fjt = scan.find_jump_targets(False) assert {64: [42], 67: [42, 42], 42: [16, 41], 19: [6]} == fjt assert scan.structs == [ { 'start': 0, 'end': 80, 'type': 'root' }, { 'start': 3, 'end': 64, 'type': 'if-then' }, { 'start': 6, 'end': 15, 'type': 'try' }, { 'start': 19, 'end': 38, 'type': 'except' }, { 'start': 45, 'end': 67, 'type': 'while-loop' }, { 'start': 70, 'end': 64, 'type': 'while-else' }, # previous bug was not mistaking while-loop for if-then { 'start': 48, 'end': 67, 'type': 'while-loop' } ] elif (3, 2) < PYTHON_VERSION_TRIPLE <= (3, 4): scan.build_instructions(code) fjt = scan.find_jump_targets(False) assert {69: [66], 63: [18]} == fjt assert scan.structs == \ [{'end': 72, 'type': 'root', 'start': 0}, {'end': 66, 'type': 'if-then', 'start': 6}, {'end': 63, 'type': 'if-then', 'start': 18}, {'end': 59, 'type': 'for-loop', 'start': 31}, {'end': 63, 'type': 'for-else', 'start': 62}] else: print("FIXME: should fix for %s" % version_tuple_to_str()) assert True return
def test_grammar(): def check_tokens(tokens, opcode_set): remain_tokens = set(tokens) - opcode_set remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens]) remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set assert remain_tokens == set([]), \ "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dump_grammar()) p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY) (lhs, rhs, tokens, right_recursive, dup_rhs) = p.check_sets() # We have custom rules that create the below expect_lhs = set(['pos_arg', 'get_iter', 'attribute']) unused_rhs = set([ 'list', 'mkfunc', 'mklambda', 'unpack', ]) expect_right_recursive = set([('designList', ('store', 'DUP_TOP', 'designList'))]) if PYTHON_VERSION != 3.7: unused_rhs.add('call') if PYTHON_VERSION > 2.6: expect_lhs.add('kvlist') expect_lhs.add('kv3') unused_rhs.add('dict') if PYTHON3: expect_lhs.add('load_genexpr') unused_rhs = unused_rhs.union( set(""" except_pop_except generator_exp """.split())) if PYTHON_VERSION >= 3.0: expect_lhs.add("annotate_arg") expect_lhs.add("annotate_tuple") unused_rhs.add("mkfunc_annotate") unused_rhs.add("dict_comp") unused_rhs.add("classdefdeco1") if PYTHON_VERSION >= 3.5: expect_right_recursive.add( (('l_stmts', ('lastl_stmt', 'come_froms', 'l_stmts')))) pass elif 3.0 < PYTHON_VERSION < 3.3: expect_right_recursive.add( (('l_stmts', ('lastl_stmt', 'COME_FROM', 'l_stmts')))) pass pass pass else: expect_lhs.add('kwarg') assert expect_lhs == set(lhs) assert unused_rhs == set(rhs) assert expect_right_recursive == right_recursive expect_dup_rhs = frozenset([('COME_FROM', ), ('CONTINUE', ), ('JUMP_ABSOLUTE', ), ('LOAD_CONST', ), ('JUMP_BACK', ), ('JUMP_FORWARD', )]) reduced_dup_rhs = dict( (k, dup_rhs[k]) for k in dup_rhs if k not in expect_dup_rhs) for k in reduced_dup_rhs: print(k, reduced_dup_rhs[k]) # assert not reduced_dup_rhs, reduced_dup_rhs s = get_scanner(PYTHON_VERSION, IS_PYPY) ignore_set = set(""" JUMP_BACK CONTINUE COME_FROM COME_FROM_EXCEPT COME_FROM_EXCEPT_CLAUSE COME_FROM_LOOP COME_FROM_WITH COME_FROM_FINALLY ELSE LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LAMBDA_MARKER RETURN_END_IF RETURN_END_IF_LAMBDA RETURN_VALUE_LAMBDA RETURN_LAST """.split()) if 2.6 <= PYTHON_VERSION <= 2.7: opcode_set = set(s.opc.opname).union(ignore_set) if PYTHON_VERSION == 2.6: opcode_set.add("THEN") check_tokens(tokens, opcode_set) elif PYTHON_VERSION == 3.4: ignore_set.add('LOAD_CLASSNAME') ignore_set.add('STORE_LOCALS') opcode_set = set(s.opc.opname).union(ignore_set) check_tokens(tokens, opcode_set)
def test_grammar(): def check_tokens(tokens, opcode_set): remain_tokens = set(tokens) - opcode_set remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set( [re.sub("LOAD_CODE$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set assert remain_tokens == set([]), "Remaining tokens %s\n====\n%s" % ( remain_tokens, p.dump_grammar(), ) p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY) (lhs, rhs, tokens, right_recursive, dup_rhs) = p.check_sets() # We have custom rules that create the below expect_lhs = set(["pos_arg"]) if PYTHON_VERSION < 3.8: if PYTHON_VERSION < 3.7: expect_lhs.add("attribute") expect_lhs.add("get_iter") else: expect_lhs.add("async_with_as_stmt") expect_lhs.add("async_with_stmt") unused_rhs = set(["list", "mkfunc", "mklambda", "unpack"]) expect_right_recursive = set([("designList", ("store", "DUP_TOP", "designList"))]) if PYTHON_VERSION <= 3.7: unused_rhs.add("call") if PYTHON_VERSION > 2.6: expect_lhs.add("kvlist") expect_lhs.add("kv3") unused_rhs.add("dict") if PYTHON3: expect_lhs.add("load_genexpr") unused_rhs = unused_rhs.union( set(""" except_pop_except generator_exp """.split())) if PYTHON_VERSION >= 3.0: if PYTHON_VERSION < 3.7: expect_lhs.add("annotate_arg") expect_lhs.add("annotate_tuple") unused_rhs.add("mkfunc_annotate") unused_rhs.add("dict_comp") unused_rhs.add("classdefdeco1") unused_rhs.add("tryelsestmtl") if PYTHON_VERSION >= 3.5: expect_right_recursive.add( (("l_stmts", ("lastl_stmt", "come_froms", "l_stmts")))) pass elif 3.0 < PYTHON_VERSION < 3.3: expect_right_recursive.add( (("l_stmts", ("lastl_stmt", "COME_FROM", "l_stmts")))) pass pass pass else: expect_lhs.add("kwarg") assert expect_lhs == set(lhs) # FIXME if PYTHON_VERSION != 3.8: assert unused_rhs == set(rhs) assert expect_right_recursive == right_recursive expect_dup_rhs = frozenset([ ("COME_FROM", ), ("CONTINUE", ), ("JUMP_ABSOLUTE", ), ("LOAD_CONST", ), ("JUMP_BACK", ), ("JUMP_FORWARD", ), ]) reduced_dup_rhs = dict( (k, dup_rhs[k]) for k in dup_rhs if k not in expect_dup_rhs) for k in reduced_dup_rhs: print(k, reduced_dup_rhs[k]) # assert not reduced_dup_rhs, reduced_dup_rhs s = get_scanner(PYTHON_VERSION, IS_PYPY) ignore_set = set(""" JUMP_BACK CONTINUE COME_FROM COME_FROM_EXCEPT COME_FROM_EXCEPT_CLAUSE COME_FROM_LOOP COME_FROM_WITH COME_FROM_FINALLY ELSE LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_STR LOAD_CODE LAMBDA_MARKER RETURN_END_IF RETURN_END_IF_LAMBDA RETURN_VALUE_LAMBDA RETURN_LAST """.split()) if 2.6 <= PYTHON_VERSION <= 2.7: opcode_set = set(s.opc.opname).union(ignore_set) if PYTHON_VERSION == 2.6: opcode_set.add("THEN") check_tokens(tokens, opcode_set) elif PYTHON_VERSION == 3.4: ignore_set.add("LOAD_CLASSNAME") ignore_set.add("STORE_LOCALS") opcode_set = set(s.opc.opname).union(ignore_set) check_tokens(tokens, opcode_set)
def code_deparse_align(co, out=sys.stderr, version=None, is_pypy=None, debug_opts=DEFAULT_DEBUG_OPTS, code_objects={}, compile_mode='exec'): """ ingests and deparses a given code block 'co' """ assert iscode(co) if version is None: version = float(sys.version[0:3]) if is_pypy is None: is_pypy = IS_PYPY # store final output stream for case of error scanner = get_scanner(version, is_pypy=is_pypy) tokens, customize = scanner.ingest(co, code_objects=code_objects) show_asm = debug_opts.get('asm', None) maybe_show_asm(show_asm, tokens) debug_parser = dict(PARSER_DEFAULT_DEBUG) show_grammar = debug_opts.get('grammar', None) show_grammar = debug_opts.get('grammar', None) if show_grammar: debug_parser['reduce'] = show_grammar debug_parser['errorstack'] = True # Build a parse tree from tokenized and massaged disassembly. show_ast = debug_opts.get('ast', None) deparsed = AligningWalker(version, scanner, out, showast=show_ast, debug_parser=debug_parser, compile_mode=compile_mode, is_pypy=is_pypy) isTopLevel = co.co_name == '<module>' deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel) assert deparsed.ast == 'stmts', 'Should have parsed grammar start' del tokens # save memory deparsed.mod_globs = find_globals(deparsed.ast, set()) # convert leading '__doc__ = "..." into doc string try: if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]): deparsed.print_docstring('', co.co_consts[0]) del deparsed.ast[0] if deparsed.ast[-1] == RETURN_NONE: deparsed.ast.pop() # remove last node # todo: if empty, add 'pass' except: pass # What we've been waiting for: Generate Python source from the parse tree! deparsed.gen_source(deparsed.ast, co.co_name, customize) for g in sorted(deparsed.mod_globs): deparsed.write('# global %s ## Warning: Unused global\n' % g) if deparsed.ERROR: raise SourceWalkerError("Deparsing stopped due to parse error") return deparsed
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): """ Compare two code-objects. This is the main part of this module. """ # print code_obj1, type(code_obj2) assert iscode( code_obj1 ), "cmp_code_object first object type is %s, not code" % type(code_obj1) assert iscode( code_obj2 ), "cmp_code_object second object type is %s, not code" % type(code_obj2) # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) # assume _both_ code objects to be new stle classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes assert dir(code_obj1) == code_obj1.__members__ assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ if name == "__main__": name = code_obj1.co_name else: name = "%s.%s" % (name, code_obj1.co_name) if name == ".?": name = "__main__" if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal # if this compare succeds, simply return # return pass if isinstance(code_obj1, object): members = [x for x in dir(code_obj1) if x.startswith("co_")] else: members = dir(code_obj1) members.sort() # ; members.reverse() tokens1 = None for member in members: if member in __IGNORE_CODE_MEMBERS__ or verify != "verify": pass elif member == "co_code": if verify != "strong": continue scanner = get_scanner(version, is_pypy, show_asm=False) global JUMP_OPS JUMP_OPS = list(scan.JUMP_OPS) + ["JUMP_BACK"] # use changed Token class # We (re)set this here to save exception handling, # which would get confusing. scanner.setTokenClass(Token) try: # ingest both code-objects tokens1, customize = scanner.ingest(code_obj1) del customize # save memory tokens2, customize = scanner.ingest(code_obj2) del customize # save memory finally: scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"] tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"] i1 = 0 i2 = 0 offset_map = {} check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): if (len(tokens1) == len(tokens2) + 2 and tokens1[-1].kind == "RETURN_VALUE" and tokens1[-2].kind == "LOAD_CONST" and tokens1[-2].pattr is None and tokens1[-3].kind == "RETURN_VALUE"): break else: raise CmpErrorCodeLen(name, tokens1, tokens2) offset_map[tokens1[i1].offset] = tokens2[i2].offset for idx1, idx2, offset2 in check_jumps.get( tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: raise CmpErrorCode( name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2, ) if tokens1[i1].kind != tokens2[i2].kind: if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind: i = 1 while tokens1[i1 + i].kind == "LOAD_CONST": i += 1 if tokens1[i1 + i].kind.startswith( ("BUILD_TUPLE", "BUILD_LIST")) and i == int( tokens1[i1 + i].kind.split("_")[-1]): t = tuple( [elem.pattr for elem in tokens1[i1:i1 + i]]) if t != tokens2[i2].pattr: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) i1 += i + 1 i2 += 1 continue elif (i == 2 and tokens1[i1 + i].kind == "ROT_TWO" and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"): i1 += 3 i2 += 2 continue elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS: f = BIN_OP_FUNCS[tokens1[i1 + i].kind] if (f(tokens1[i1].pattr, tokens1[i1 + 1].pattr) == tokens2[i2].pattr): i1 += 3 i2 += 1 continue elif tokens1[i1].kind == "UNARY_NOT": if tokens2[i2].kind == "POP_JUMP_IF_TRUE": if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE": i1 += 2 i2 += 1 continue elif tokens2[i2].kind == "POP_JUMP_IF_FALSE": if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE": i1 += 2 i2 += 1 continue elif (tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK") and tokens1[i1 - 1].kind == "RETURN_VALUE" and tokens2[i2 - 1].kind in ("RETURN_VALUE", "RETURN_END_IF") and int(tokens1[i1].offset) not in targets1): i1 += 1 continue elif (tokens1[i1].kind == "JUMP_BACK" and tokens2[i2].kind == "CONTINUE"): # FIXME: should make sure that offset is inside loop, not outside of it i1 += 2 i2 += 2 continue elif (tokens1[i1].kind == "JUMP_FORWARD" and tokens2[i2].kind == "JUMP_BACK" and tokens1[i1 + 1].kind == "JUMP_BACK" and tokens2[i2 + 1].kind == "JUMP_BACK" and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3): if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset): i1 += 2 i2 += 2 continue elif (tokens1[i1].kind == "LOAD_NAME" and tokens2[i2].kind == "LOAD_CONST" and tokens1[i1].pattr == "None" and tokens2[i2].pattr is None): pass elif (tokens1[i1].kind == "LOAD_GLOBAL" and tokens2[i2].kind == "LOAD_NAME" and tokens1[i1].pattr == tokens2[i2].pattr): pass elif (tokens1[i1].kind == "LOAD_ASSERT" and tokens2[i2].kind == "LOAD_NAME" and tokens1[i1].pattr == tokens2[i2].pattr): pass elif (tokens1[i1].kind == "RETURN_VALUE" and tokens2[i2].kind == "RETURN_END_IF"): pass elif (tokens1[i1].kind == "BUILD_TUPLE_0" and tokens2[i2].pattr == ()): pass else: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) elif (tokens1[i1].kind in JUMP_OPS and tokens1[i1].pattr != tokens2[i2].pattr): if tokens1[i1].kind == "JUMP_BACK": dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if offset_map[dest1] != dest2: raise CmpErrorCode( name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2, ) else: # import pdb; pdb.set_trace() try: dest1 = int(tokens1[i1].pattr) if dest1 in check_jumps: check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] except: pass i1 += 1 i2 += 1 del tokens1, tokens2 # save memory elif member == "co_consts": # partial optimization can make the co_consts look different, # so we'll just compare the code consts codes1 = (c for c in code_obj1.co_consts if hasattr(c, "co_consts")) codes2 = (c for c in code_obj2.co_consts if hasattr(c, "co_consts")) for c1, c2 in zip(codes1, codes2): cmp_code_objects(version, is_pypy, c1, c2, verify, name=name) elif member == "co_flags": flags1 = code_obj1.co_flags flags2 = code_obj2.co_flags if is_pypy: # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8: flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8 # We also don't care about COROUTINE or GENERATOR for now flags1 &= ~0x000000A0 flags2 &= ~0x000000A0 if flags1 != flags2: raise CmpErrorMember( name, "co_flags", pretty_code_flags(flags1), pretty_code_flags(flags2), ) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): raise CmpErrorMember(name, member, getattr(code_obj1, member), getattr(code_obj2, member))
pass pass return False class Python36ParserSingle(Python36Parser, PythonParserSingle): pass if __name__ == '__main__': # Check grammar p = Python36Parser() p.check_grammar() from uncompyle6 import PYTHON_VERSION, IS_PYPY if PYTHON_VERSION == 3.6: lhs, rhs, tokens, right_recursive = p.check_sets() from uncompyle6.scanner import get_scanner s = get_scanner(PYTHON_VERSION, IS_PYPY) opcode_set = set(s.opc.opname).union( set("""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME LAMBDA_MARKER RETURN_LAST """.split())) remain_tokens = set(tokens) - opcode_set import re remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens]) remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items()))
# Python 3.4+ has more loop optimization that removes # JUMP_FORWARD in some cases, and hence we also don't # see COME_FROM _ifstmts_jump ::= c_stmts_opt """ class Python35ParserSingle(Python35Parser, PythonParserSingle): pass if __name__ == '__main__': # Check grammar p = Python35Parser() p.checkGrammar() from uncompyle6 import PYTHON_VERSION, IS_PYPY if PYTHON_VERSION == 3.5: lhs, rhs, tokens, right_recursive = p.checkSets() from uncompyle6.scanner import get_scanner s = get_scanner(PYTHON_VERSION, IS_PYPY) opcode_set = set(s.opc.opname).union(set( """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME LAMBDA_MARKER RETURN_LAST """.split())) remain_tokens = set(tokens) - opcode_set import re remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items()))