示例#1
0
def main(file_path):
    dir_and_name = file_path.split('/')
    if len(dir_and_name) > 1:
        tests_dir = dir_and_name[0]
        file_name = dir_and_name[1]
    else:
        tests_dir = "."
        file_name = dir_and_name[0]

    file_content = ""
    lines = open(file_path, "rt").readlines()
    for line in lines:
        if line.startswith("#include"):
            continue
        file_content += line

    parser = CParser()
    generator = CGenerator()

    ast = parser.parse(file_content, file_name, debuglevel=0)

    test_finder = MethodVisitor()
    test_finder.visit(ast)

    include_text = ""

    suite_code = build_main(test_finder.test_functions,
                            file_name,
                            include_text)

    suite_file = open("/".join([tests_dir, "suite.c"]), "wt")
    suite_file.write(suite_code)
    suite_file.close()
def backward_call(decl):
    parser = CParser()
    decl = parser.parse(decl, filename='<stdin>').ext[0]
    name = decl.name
    args = decl.type.args
    nargs = len(args.params)
    if len(decl.type.type.type.names) > 1:
        assert False
    else:
        rtype = decl.type.type.type.names[0]

    ndecl = rtype + ' ' + name[len('cephes_ '):] + '('

    call_expr = name + '('
    for param in args.params:
        if len(param.type.type.names) > 1:
            assert False
        typ = param.type.type.names[0]
        ndecl += typ + ' ' + param.name + ', '
        call_expr += param.name + ', '
    if nargs > 0:
        ndecl = ndecl[:-2]
        call_expr = call_expr[:-2]
    ndecl += ')'
    call_expr += ')'
    ndecl += " { return %s; }" % call_expr
    return ndecl
示例#3
0
文件: parser.py 项目: felipecruz/qc
def parse(file_content, file_name):
    parser = CParser()
    generator = CGenerator()

    ast = parser.parse(file_content, file_name, debuglevel=0)

    test_finder = ASTVisitor()
    test_finder.visit(ast)

    return test_finder
示例#4
0
文件: cparse.py 项目: wodelover/mlib
def parse_cstruct(code, cls=None):
    global TYPE_TRL_TABLE
    cp = CParser()
    st = cp.parse(PREPEND_TYPES + '\n' + code)
    decls = list(st.children())
    mystruct = decls.pop()[1]
    if not TYPE_TRL_TABLE:
        TYPE_TRL_TABLE = mk_trltable(decls)

    fields = {'_fields_': get_fields(mystruct)}
    return type(mystruct.name, (cls or c.Structure, ), fields)
示例#5
0
def parse(text, filename='', parser=None, fake_typedefs=False):
    if parser is None:
        parser = CParser()

    if fake_typedefs:
        text = ''.join((fake.typedefs, f'# 1 "{filename}"\n', text))

    ast = parser.parse(text, filename)
    for i in range(len(ast.ext)):
        node = ast.ext[i]
        if isinstance(node, c_ast.Typedef) and node.name == '__end_of_fakes__':
            break

    del ast.ext[:i + 1]
    return ast
示例#6
0
    def build(self, data_dir):
        for i in range(1, 105):
            data_subdir = data_dir + "/" + str(i)
            for file_name in os.listdir(data_subdir):
                num = int(file_name[:-4])
                name = data_subdir + "/" + file_name
                with open(name, errors="ignore") as f:
                    code = f.read()
                    parser = CParser()
                    ast = parser.parse(comment_remover(code))
                    visitor = CodeToWordVisitor()
                    visitor.visit(ast)
                    seq = visitor.pre_order
                    self._add_words(seq)
                    self.data[(i, num)] = self._words2data(seq)
            print("Directory {} built".format(i))

        print("Vocabulary Size: {}".format(len(self.dictionary)))
def api_fdecls(decl):
    parser = CParser()
    decl = parser.parse(decl, filename='<stdin>').ext[0]
    name = decl.name
    args = decl.type.args
    nargs = len(args.params)
    if len(decl.type.type.type.names) > 1:
        assert False
    else:
        rtype = decl.type.type.type.names[0]
    ndecl = rtype + ' ncephes_' + _rcs(name) + '('
    for param in args.params:
        if len(param.type.type.names) > 1:
            assert False
        typ = param.type.type.names[0]
        ndecl += typ + ' ' + param.name + ', '
    if nargs > 0:
        ndecl = ndecl[:-2]
    return ndecl + ');'
示例#8
0
def parse_file(filename,
               use_cpp=False,
               cpp_path='cpp',
               cpp_args='',
               parser=None):
    """ Parse a C file using pycparser.

        filename:
            Name of the file you want to parse.

        use_cpp:
            Set to True if you want to execute the C pre-processor
            on the file prior to parsing it.

        cpp_path:
            If use_cpp is True, this is the path to 'cpp' on your
            system. If no path is provided, it attempts to just
            execute 'cpp', so it must be in your PATH.

        cpp_args:
            If use_cpp is True, set this to the command line arguments strings
            to cpp. Be careful with quotes - it's best to pass a raw string
            (r'') here. For example:
            r'-I../utils/fake_libc_include'
            If several arguments are required, pass a list of strings.

        parser:
            Optional parser object to be used instead of the default CParser

        When successful, an AST is returned. ParseError can be
        thrown if the file doesn't parse successfully.

        Errors from cpp will be printed out.
    """
    if use_cpp:
        text = preprocess_file(filename, cpp_path, cpp_args)
    else:
        with io.open(filename) as f:
            text = f.read()

    if parser is None:
        parser = CParser()
    return parser.parse(text, filename)
示例#9
0
def compile(code):
	parser = CParser()

	stypes = 'u8 i8 u16 i16 u32 i32 u64 i64 f32 f64 f128'
	code = 'void runner() { ' + code + ' ; }'
	for type in stypes.split(' '):
		code = 'typedef void %s; %s' % (type, code)

	ast = parser.parse(code)
	found = None
	for _, child in ast.children():
		if isinstance(child, FuncDef):
			found = child
			break

	assert found is not None
	assert len(found.body.children()) == 1

	ast = found.body.children()[0][1]
	sexp = AstTranslator().process(ast)

	def run(ctu):
		return bare(SexpRunner(ctu).run(sexp))
	return run
示例#10
0
def print_header(message):
    generator = CGenerator()
    parser = CParser()

    def del_spaces(name):
        if name.startswith('(extension in '):
            idx = name.index('):')
            name = '_extension_in_' + name[14:idx] + "__" + name[idx + 2:]

        # file private types
        if ' in _' in name:
            idx = name.index(' in _')
            end = name.index(')', idx)
            start = name.rindex('(', None, idx)
            namespace = name[:start]
            if '>' in namespace:
                namespace = mangle_name(namespace[:-1]) + '.'
            name = namespace + name[start + 1:idx] + name[end + 1:]
        return name

    def mangle_name(human):
        if human in ('void*', 'voidp', 'Metadata*'):
            return human
        if human == '()':
            return 'void'

        info = types[human]
        if 'getGenericParams' in info and info['getGenericParams']:
            name = remove_generic(human)
        else:
            name = human

        if name.startswith('?Unknown type of'):
            name = name.replace('?Unknown type of ', 'XXX_unknown_type_of_')

        if name.startswith("Static #"):
            spl = name.split(' ', 4)
            return "_static_no" + spl[1][1:] + "_in_" + spl[
                3] + "__func" + str(hash(spl[4]))[1:]
        name = del_spaces(name)

        outp = f'swift_{info["kind"]}__'

        if info['kind'] == "Tuple":
            elems = []
            for e in info['tupleElements']:
                name = mangle_name(e['type'])
                if e['label']:
                    name += "__as_" + e['label']
                elems.append(name)
            outp += "with__" + "__and__".join(elems)
        elif info['kind'] == "Existential":
            protos = []
            for p in info['protocols']:
                protos.append(
                    del_spaces(script.exports.demangle(p)).replace(".", "__"))
            if info['isClassBounded']:
                protos.append("Swift__AnyObject")
            if protos:
                outp += "conforming_to__" + "__and__".join(protos)
            else:
                outp += "Any"
            if info.get('getSuperclassConstraint'):
                outp += "__inheriting_from_" + mangle_name(
                    info['getSuperclassConstraint'])
        elif info['kind'] == 'Function':
            return "func_" + str(hash(name))[1:]
        else:
            outp += name.replace(".", "_")

        if 'getGenericParams' in info and info['getGenericParams']:
            gen_params = [
                mangle_name(param) for param in info['getGenericParams']
            ]
            outp += "__of__" + "__and__".join(gen_params)

        return outp

    def make_decl(name, offset, type_name):
        nonlocal decls, pad_count, parser, prev_end

        if isinstance(offset, str):
            assert offset[:2] == '0x'
            offset = int(offset, 16)

        if prev_end < offset:
            pad_str = f"char _padding{pad_count}[{offset - prev_end}];"
            decls.append(parser.parse(pad_str).ext[0])
            pad_count += 1

        type_decl = TypeDecl(name.replace(".", "__"), None,
                             IdentifierType([mangle_name(type_name)]))
        decls.append(Decl(None, None, None, None, type_decl, None, None))

        req_graph.setdefault(type_name, set()).add(parent_name)

        if offset != -1:
            size = pointer_size if type_name.endswith('*') else int(
                types[type_name]['size'], 16)
            prev_end = offset + size

    #print("#include <stdint.h>")
    print("#pragma pack(1)")
    print("typedef void *voidp;")
    print("typedef struct Metadata_s Metadata;")
    types = json.loads(message)

    req_graph = {}
    ptr_types = {'void*', 'voidp', 'Metadata*'}
    ctypes = {}

    for name, info in types.items():
        pad_count = 0
        decls = []
        prev_end = 0
        ctype = None
        parent_name = name
        if info['kind'] == "Tuple":
            for i, elem in enumerate(info['tupleElements']):
                make_decl(elem['label'] or f'_{i}', elem['offset'],
                          elem['type'])
            ctype = Struct(mangle_name(name) + "_s", decls)
        elif info['kind'] == "ObjCClassWrapper":
            print(
                f'typedef struct {mangle_name(name)}_s *{mangle_name(name)};')
        elif info['kind'] in ("Struct", "Class"):
            if info['kind'] == 'Class':
                make_decl('_isa', '0x0', 'Metadata*')
                #make_decl('_refCounts', hex(pointer_size), 'size_t')

            for i, field in enumerate(info['fields']):
                make_decl(field['name'], field['offset'], field['type'])
            ctype = Struct(mangle_name(name) + "_s", decls)

            if info['kind'] == 'Class':
                ctype = PtrDecl(None, ctype)
        elif info['kind'] == "Existential":
            if info['isClassBounded'] or info.get(
                    'getSuperclassConstraint'):  # class existential container
                make_decl(f'heap_object', -1, 'void*')
            else:  # opaque existential container
                decls.append(
                    parser.parse("void *heapObjectOrInlineData0;").ext[0])
                for i in range(1, 3):
                    decls.append(
                        parser.parse(
                            "void *nothingOrInlineData{};".format(i)).ext[0])
                make_decl("dynamicType", -1, "Metadata*")
            for i in range(info['witnessTableCount']):
                make_decl(f'_witnessTable{i + 1}', -1, 'void*')
            ctype = Struct(mangle_name(name) + "_s", decls)
        elif info['kind'] in ("Enum", "Optional"):
            if info['enumCases'] and info['enumCases'][0]['name'] is None:
                # C-like enum
                # we don't have case names or values, so just generate a typedef to an int type
                print(
                    f"typedef uint{int(info['size'], 16) * 8}_t {mangle_name(name)};"
                )
            elif len(info['enumCases']) == 0:
                ctype = Struct(mangle_name(name) + "_s", decls)
            elif len(info['enumCases']) == 1 and info['enumCases'][0]['type']:
                make_decl(info['enumCases'][0]['name'], 0,
                          info['enumCases'][0]['type'])
                ctype = Struct(mangle_name(name) + "_s", decls)
            else:
                print(
                    f'typedef struct {mangle_name(name)}_s {{ char _data[{info["size"]}]; }} {mangle_name(name)};'
                )
        elif info['kind'] == 'Opaque':
            if 'getCType' in info:
                ctype_names = {
                    'pointer': 'void*',
                    'int8': 'int8_t',
                    'int16': 'int16_t',
                    'int32': 'int32_t',
                    'int64': 'int64_t',
                    'int64': 'int64_t',
                }
                print(
                    f'typedef {ctype_names[info["getCType"]]} {mangle_name(name)};'
                )
            elif name == 'Builtin.NativeObject':
                print(f'typedef void *{mangle_name(name)};')
            else:
                print(f'typedef char {mangle_name(name)}[{info["size"]}];')
        elif info['kind'] == 'Function':
            print(f"typedef void *func_{str(hash(name))[1:]};"
                  )  # TODO: proper names
        else:
            print(f'typedef char {mangle_name(name)}[{info["size"]}];')

        if ctype:
            type_decl = TypeDecl(mangle_name(name), None, ctype)
            ctypes[name] = type_decl
            type_decl_forward = Struct(mangle_name(name) + "_s", [])
            if isinstance(type_decl, PtrDecl):
                ptr_types.add(name)
                type_decl_forward = PtrDecl(None, type_decl_forward)
                print(
                    generator.visit(
                        Typedef(mangle_name(name), None, ['typedef'],
                                type_decl_forward)) + ";")

    for name in ptr_types:
        req_graph.pop(name, None)

    for name in top_sort(req_graph):
        if name in ctypes:
            print(f"\n// {name}")
            print(
                generator.visit(
                    Typedef(mangle_name(name), None, ['typedef'],
                            ctypes[name])) + ";")
示例#11
0
from compat import MagicMock

from pycparser.c_parser import CParser
from pycparser.c_generator import CGenerator

from automock import MockGenerator
from automock import MockInfo, ReturnHint
from automock import ArgInfo, ArgHint

from os import path

# CParser() takes about a second to run on my machine, so create it
# only once instead of in setUp() for every test
cparser = CParser()
cgen = CGenerator()
emptyast = cparser.parse('')
defaulthname = "../mockable.h"


class MockGeneratorTests(TestCase):
    def setUp(self):
        self.maxDiff = None
        self.mpaths = MagicMock()
        self.mpaths.headerpath = defaulthname

    def test_shouldGenerateMockFromOtherwiseEmptyHeader(self):
        # Given
        mgen = MockGenerator(self.mpaths, cgen,
                             cparser.parse("void func1(void);", defaulthname))
        # When
        mocks = mgen.mocks
示例#12
0
def match_functions(
        repo_info: RepoInfo,
        archive_folder: str,
        temp_folder: str,
        decompile_folder: str,
        use_fake_libc_headers: bool = True,
        preprocess_timeout: Optional[int] = None,
        *,
        progress_bar: Optional[flutes.ProgressBarManager.Proxy] = None
) -> Result:
    # Directions:
    # 1. Clone or extract from archive.
    # 2. For each Makefile, rerun the compilation process with the flag "-E", so only the preprocessor is run.
    #    This probably won't take long as the compiler exits after running the processor, and linking would fail.
    #    Also, consider using "-nostdlib -Ipath/to/fake_libc_include" as suggested by `pycparser`.
    # 3. The .o files are now preprocessed C code. Parse them using `pycparser` to obtain a list of functions.

    start_time = time.time()
    total_files = sum(
        len(makefile) for makefile in repo_info.makefiles.values())
    repo_folder_name = f"{repo_info.repo_owner}_____{repo_info.repo_name}"
    repo_full_name = f"{repo_info.repo_owner}/{repo_info.repo_name}"
    archive_path = (Path(archive_folder) /
                    f"{repo_full_name}.tar.gz").absolute()
    repo_dir = (Path(temp_folder) / repo_folder_name).absolute()
    repo_src_path = repo_dir / "src"
    repo_binary_dir = repo_dir / "bin"
    repo_binary_dir.mkdir(parents=True, exist_ok=True)
    has_error = False

    if progress_bar is not None:
        worker_id = flutes.get_worker_id()
        process_name = f"Worker {worker_id}" if worker_id is not None else "Main Process"
        progress_bar.new(total=total_files,
                         desc=process_name + f" [{repo_full_name}]")

    flutes.log(f"Begin processing {repo_full_name} ({total_files} files)")

    if os.path.exists(archive_path):
        # Extract archive
        flutes.run_command(["tar", f"xzf", str(archive_path)],
                           cwd=str(repo_dir))
        (repo_dir / repo_folder_name).rename(repo_src_path)
    else:
        # Clone repo
        if repo_src_path.exists():
            shutil.rmtree(repo_src_path)
        ret = ghcc.clone(repo_info.repo_owner,
                         repo_info.repo_name,
                         clone_folder=str(repo_dir),
                         folder_name="src")
        if ret.error_type not in [None, ghcc.CloneErrorType.SubmodulesFailed]:
            flutes.log(
                f"Failed to clone {repo_full_name}: error type {ret.error_type}",
                "error")
            # Return a dummy result so this repo is ignored in the future.
            return Result(repo_info.repo_owner, repo_info.repo_name, [], {}, 0,
                          0, 0)

    # Write makefile info to pickle
    with (repo_binary_dir / "makefiles.pkl").open("wb") as f_pkl:
        pickle.dump(repo_info.makefiles, f_pkl)

    gcc_flags = "-E"
    directory_mapping = None
    if use_fake_libc_headers:
        gcc_flags = f"-E -nostdlib -I/usr/src/libc"
        directory_mapping = {ghcc.parse.FAKE_LIBC_PATH: "/usr/src/libc"}

    if progress_bar is not None:
        progress_bar.update(postfix={"status": "preprocessing"})
    makefiles = ghcc.docker_batch_compile(
        str(repo_binary_dir),
        str(repo_src_path),
        compile_timeout=preprocess_timeout,
        gcc_override_flags=gcc_flags,
        use_makefile_info_pkl=True,
        directory_mapping=directory_mapping,
        user_id=(repo_info.idx % 10000) + 30000,  # user IDs 30000 ~ 39999
        exception_log_fn=functools.partial(exception_handler,
                                           repo_info=repo_info))

    parser = CParser(lexer=ghcc.parse.CachedCLexer)
    lexer = ghcc.parse.LexerWrapper()
    decompile_path = Path(decompile_folder)
    extractor = ghcc.parse.FunctionExtractor()
    matched_functions: List[MatchedFunction] = []
    preprocessed_original_code: Dict[str, str] = {}
    files_found = 0
    functions_found = 0
    for makefile in makefiles:
        mkfile_dir = Path(makefile['directory'])
        for path, sha in zip(makefile["binaries"], makefile["sha256"]):
            # Load and parse preprocessed original code.
            code_path = str(mkfile_dir / path)
            json_path = decompile_path / (sha + ".jsonl")
            preprocessed_code_path = repo_binary_dir / sha
            if progress_bar is not None:
                progress_bar.update(1, postfix={"file": code_path})
            if not json_path.exists() or not preprocessed_code_path.exists():
                continue
            try:
                with preprocessed_code_path.open("r") as f:
                    code = f.read()
                code = LINE_CONTROL_REGEX.sub("", code)
            except UnicodeDecodeError:
                continue  # probably a real binary file
            preprocessed_original_code[sha] = code
            try:
                original_ast: ASTNode = parser.parse(code,
                                                     filename=os.path.join(
                                                         repo_full_name, path))
            except (pycparser.c_parser.ParseError, AssertionError) as e:
                # For some reason `pycparser` uses `assert`s in places where there should have been a check.
                flutes.log(
                    f"{repo_full_name}: Parser error when processing file "
                    f"{code_path} ({sha}): {str(e)}", "error")
                has_error = True
                continue  # ignore parsing errors
            original_tokens = ghcc.parse.convert_to_tokens(
                code, parser.clex.cached_tokens)
            files_found += 1
            function_asts = extractor.find_functions(original_ast)
            functions_found += len(function_asts)

            # Collect decompiled functions with matching original code.
            with json_path.open("r") as f:
                decompiled_json = [
                    line for line in f if line
                ]  # don't decode, as we only need the function name
            decompiled_funcs: Dict[str,
                                   str] = {}  # (func_name) -> decompiled_code
            decompiled_var_names: Dict[str, Dict[str, Tuple[str, str]]] = {} \
                # (func_name) -> (var_id) -> (decomp_name, orig_name)

            for line_num, j in enumerate(decompiled_json):
                # Find function name from JSON line without parsing.
                match = JSON_FUNC_NAME_REGEX.search(j)
                assert match is not None
                func_name = match.group(1)
                if func_name not in function_asts:
                    continue

                try:
                    decompiled_data = json.loads(j)
                except json.JSONDecodeError as e:
                    flutes.log(
                        f"{repo_full_name}: Decode error when reading JSON file at {json_path}: "
                        f"{str(e)}", "error")
                    continue
                decompiled_code = decompiled_data["raw_code"]
                # Store the variable names used in the function.
                # We use a random string as the identifier prefix. Sadly, C89 (and `pycparser`) doesn't support Unicode.
                for length in range(3, 10 + 1):
                    var_identifier_prefix = "v" + "".join(
                        random.choices(string.ascii_lowercase, k=length))
                    if var_identifier_prefix not in decompiled_code:
                        break
                else:
                    # No way this is happening, right?
                    flutes.log(
                        f"{repo_full_name}: Could not find valid identifier prefix for "
                        f"{func_name} in {code_path} ({sha})", "error")
                    continue
                variables: Dict[str, Tuple[str, str]] = {
                }  # (var_id) -> (decompiled_name, original_name)
                for match in DECOMPILED_VAR_REGEX.finditer(decompiled_code):
                    var_id, decompiled_name, original_name = match.groups()
                    var_id = f"{var_identifier_prefix}_{var_id}"
                    if var_id in variables:
                        assert variables[var_id] == (decompiled_name,
                                                     original_name)
                    else:
                        variables[var_id] = (decompiled_name, original_name)
                decompiled_var_names[func_name] = variables
                # Remove irregularities in decompiled code to make the it parsable:
                # - Replace `@@VAR` with special identifiers (literally anything identifier that doesn't clash).
                # - Remove the register allocation indication in `var@<rdi>`.
                decompiled_code = DECOMPILED_VAR_REGEX.sub(
                    rf"{var_identifier_prefix}_\1", decompiled_code)
                decompiled_code = DECOMPILED_REG_ALLOC_REGEX.sub(
                    "", decompiled_code)
                if func_name.startswith("_"):
                    # For some reason, Hexrays would chomp off one leading underscore from function names in their
                    # generated code, which might lead to corrupt code (`_01inverse` -> `01inverse`). Here we
                    # heuristically try to find and replace the changed function name.
                    decompiled_code = re.sub(  # replace all identifiers with matching name
                        r"(?<![a-zA-Z0-9_])" + func_name[1:] +
                        r"(?![a-zA-Z0-9_])", func_name, decompiled_code)
                    # Note that this doesn't fix references of the function in other functions. But really, why would
                    # someone name their function `_01inverse`?
                decompiled_funcs[func_name] = decompiled_code

            # Generate code replacing original functions with decompiled functions.
            replacer = ghcc.parse.FunctionReplacer(decompiled_funcs)
            replaced_code = replacer.visit(original_ast)

            # Obtain AST for decompiled code by parsing it again.
            code_to_preprocess = DECOMPILED_CODE_HEADER + "\n" + replaced_code
            try:
                code_to_parse = ghcc.parse.preprocess(code_to_preprocess)
            except ghcc.parse.PreprocessError as e:
                msg = (
                    f"{repo_full_name}: GCC return value nonzero for decompiled code of "
                    f"{code_path} ({sha})")
                if len(e.args) > 0:
                    msg += ":\n" + str(e)
                flutes.log(msg, "error")
                has_error = True
                continue

            try:
                decompiled_ast, code_to_parse = ghcc.parse.parse_decompiled_code(
                    code_to_parse, lexer, parser)
                decompiled_tokens = ghcc.parse.convert_to_tokens(
                    code_to_parse, parser.clex.cached_tokens)
            except (ValueError, pycparser.c_parser.ParseError) as e:
                flutes.log(
                    f"{repo_full_name}: Could not parse decompiled code for "
                    f"{code_path} ({sha}): {str(e)}", "error")
                has_error = True

                # We don't have ASTs for decompiled functions, but we can still dump the code.
                # Use the dummy typedefs to extract functions.
                code_lines = code_to_parse.split("\n")
                func_begin_end: Dict[str, List[Optional[int]]] = defaultdict(
                    lambda: [None, None])
                for idx, line in enumerate(code_lines):
                    name, is_begin = replacer.extract_func_name(line)
                    if name is not None:
                        func_begin_end[name][0 if is_begin else 1] = idx
                for func_name, (begin, end) in func_begin_end.items():
                    if begin is not None and end is not None and func_name in function_asts:
                        decompiled_func_tokens = lexer.lex("\n".join(
                            code_lines[(begin + 1):end]))
                        original_func_ast = function_asts[func_name]
                        original_ast_json, original_func_tokens = serialize(
                            original_func_ast, original_tokens)
                        matched_func = MatchedFunction(
                            file_path=code_path,
                            binary_hash=sha,
                            func_name=func_name,
                            variable_names=decompiled_var_names[func_name],
                            original_tokens=original_func_tokens,
                            decompiled_tokens=decompiled_func_tokens,
                            original_ast_json=original_ast_json,
                            decompiled_ast_json=None)
                        matched_functions.append(matched_func)

            else:
                # We've successfully parsed decompiled code.
                decompiled_func_asts = extractor.find_functions(decompiled_ast)
                for func_name in decompiled_funcs.keys():
                    original_func_ast = function_asts[func_name]
                    if func_name not in decompiled_func_asts:
                        # Maybe there's other Hexrays-renamed functions that we didn't fix, just ignore them.
                        continue
                    decompiled_func_ast = decompiled_func_asts[func_name]
                    original_ast_json, original_func_tokens = serialize(
                        original_func_ast, original_tokens)
                    decompiled_ast_json, decompiled_func_tokens = serialize(
                        decompiled_func_ast, decompiled_tokens)
                    matched_func = MatchedFunction(
                        file_path=code_path,
                        binary_hash=sha,
                        func_name=func_name,
                        variable_names=decompiled_var_names[func_name],
                        original_tokens=original_func_tokens,
                        decompiled_tokens=decompiled_func_tokens,
                        original_ast_json=original_ast_json,
                        decompiled_ast_json=decompiled_ast_json)
                    matched_functions.append(matched_func)

    # Cleanup the folders; if errors occurred, keep the preprocessed code.
    status = ("success" if not has_error and len(matched_functions) > 0 else (
        "warning" if not has_error or len(matched_functions) > 0 else "error"))
    shutil.rmtree(repo_dir)

    end_time = time.time()
    funcs_without_asts = sum(matched_func.decompiled_ast_json is None
                             for matched_func in matched_functions)
    flutes.log(
        f"[{end_time - start_time:6.2f}s] "
        f"{repo_full_name}: "
        f"Files found: {files_found}/{total_files}, "
        f"functions matched: {len(matched_functions)}/{functions_found} "
        f"({funcs_without_asts} w/o ASTs)",
        status,
        force_console=True)
    return Result(repo_owner=repo_info.repo_owner,
                  repo_name=repo_info.repo_name,
                  matched_functions=matched_functions,
                  preprocessed_original_code=preprocessed_original_code,
                  files_found=files_found,
                  functions_found=functions_found,
                  funcs_without_asts=funcs_without_asts)
示例#13
0
class ForgivingDeclarationParser:
    def __init__(self, source_code, functions, rename_parameters_file=None):
        self.source_code = source_code
        self.functions = functions
        self.token_stream = self.tokenize(source_code)
        self.previous = None
        self.current = None
        self.current_file = None

        self.chunks_to_erase = []
        self.bracket_stack = []
        self.source_context = []
        self.typedefs_code = ['typedef int __builtin_va_list;']
        self.typedefs = {}
        self.structs_code = []
        self.structs = []
        self.struct_typedefs = []
        self.includes = []

        self.cparser = CParser()
        self.param_names = None

        if rename_parameters_file is not None:
            self.param_names = load_param_names(rename_parameters_file)

        self.func_names = []
        self.func_signatures = []
        self.file_ast = None
        self.mocked_functions = []
        self.parse()

        if self.functions:
            for function in sorted(functions):
                print(
                    f"error: Mocked function '{function}' undeclared. Add "
                    "missing include in the test file.",
                    file=sys.stderr)

            raise Exception(
                'Unable to find declarations of all mocked functions. Add missing '
                'include(s) in the test file.')

    @classmethod
    def tokenize(cls, source_code):
        for match in RE_TOKEN.finditer(source_code):
            if match.lastgroup not in IGNORED_TOKENS:
                yield Token(match.lastgroup,
                            match.group().strip(), match.span())

    def parse(self):
        while self.next():
            if self.current.is_keyword('typedef'):
                self.parse_typedef()

            parsed = self.parse_function_declaration_or_struct()

            if parsed is not None:
                self.func_names.append(parsed[0])
                self.func_signatures.append(parsed[1])
                self.functions.remove(parsed[0])

            if not self.functions:
                break

            while self.bracket_stack or not self.current.is_punctuation(
                    ';', '}'):
                self.next()

        if self.functions:
            return

        code = '\n'.join(self.typedefs_code + self.structs_code +
                         self.func_signatures)
        self.file_ast = self.cparser.parse(code)
        func_offset = len(self.typedefs_code + self.structs_code)

        for i, func_name in enumerate(self.func_names, func_offset):
            if self.param_names is None:
                func_declaration = self.file_ast.ext[i]
            else:
                func_declaration = rename_parameters(
                    self.file_ast.ext[i], self.param_names.get(func_name))

            self.mocked_functions.append(
                MockedFunction(func_name, func_declaration))

        self.load_typedefs()
        self.load_structs()

    def resolve_type(self, type_):
        if isinstance(type_, c_ast.IdentifierType):
            name = ' '.join(type_.names)

            if name in PRIMITIVE_TYPES or name == '_Bool':
                return PrimitiveType(name)
            elif name == '__builtin_va_list':
                return VaList()
            elif name == 'void':
                return VoidType()
            else:
                return self.resolve_type(self.lookup_typedef(name).type)
        elif isinstance(
                type_,
            (c_ast.Union, c_ast.Struct, c_ast.FuncDecl, c_ast.Enum)):
            return type_
        elif isinstance(type_, c_ast.TypeDecl):
            return self.resolve_type(type_.type)
        elif isinstance(type_, c_ast.ArrayDecl):
            if type_.dim is None:
                return self.resolve_type(type_.type)
            else:
                return self.resolve_type(type_.type)
        elif isinstance(type_, c_ast.PtrDecl):
            return self.resolve_type(type_.type)
        else:
            raise Exception(f'Unknown type {type_}.')

    def expand_type(self, type_):
        if isinstance(type_, c_ast.IdentifierType):
            name = ' '.join(type_.names)

            if name in PRIMITIVE_TYPES:
                pass
            elif name in ['__builtin_va_list', 'void', '_Bool']:
                pass
            else:
                type_ = self.expand_type(self.lookup_typedef(name).type)
        elif isinstance(
                type_,
            (c_ast.Union, c_ast.Struct, c_ast.FuncDecl, c_ast.Enum)):
            pass
        elif isinstance(type_, c_ast.TypeDecl):
            type_.type = self.expand_type(type_.type)
        elif isinstance(type_, (c_ast.PtrDecl, c_ast.ArrayDecl)):
            type_.type = self.expand_type(type_.type)
        else:
            raise Exception(f'Unknown type {type_}.')

        return type_

    def lookup_typedef(self, name):
        if name in self.typedefs:
            return self.typedefs[name]

    def load_struct_member(self, member):
        items = []
        expanded_type = self.expand_type(member.type)
        type_ = self.resolve_type(member.type)

        if is_fixed_array(expanded_type):
            items.append(['assert-array-eq', member.name])
        elif is_pointer_or_array(expanded_type):
            pass
        elif isinstance(type_, (PrimitiveType, c_ast.Enum)):
            if member.bitsize is None:
                items.append(['assert-eq', member.name])
            else:
                items.append(['assert-eq-bit-field', member.name, type_.name])
        elif is_struct(expanded_type):
            if type_.name is None:
                for item in self.load_struct_members(type_):
                    item[1] = f'{member.name}.{item[1]}'
                    items.append(item)
            else:
                items.append(['assert-struct', member.name, type_.name])

        return items

    def load_struct_members(self, struct):
        if not struct.decls:
            return []

        items = []

        for member in struct.decls:
            if member.name is None:
                continue

            items += self.load_struct_member(member)

        return items

    def load_structs(self):
        for item in self.file_ast:
            expanded_type = self.expand_type(item.type)

            if not is_struct(expanded_type):
                continue

            type_ = self.resolve_type(expanded_type)

            if isinstance(item, c_ast.Typedef):
                if type_.decls is None:
                    continue

                items = self.load_struct_members(type_)
                self.struct_typedefs.append((item.name, items))
            else:
                items = self.load_struct_members(type_)
                self.structs.append((type_.name, items))

    def load_typedefs(self):
        for item in self.file_ast:
            if isinstance(item, c_ast.Typedef):
                self.typedefs[item.name] = item

    def next(self):
        self.previous = self.current
        self.current = next(self.token_stream, None)

        if not self.current:
            return None

        if self.current.type == 'PUNCTUATION':
            if self.current.value in '({[':
                self.bracket_stack.append(')}]'['({['.index(
                    self.current.value)])
            elif self.bracket_stack and self.current.value == self.bracket_stack[
                    -1]:
                self.bracket_stack.pop()
        elif self.current.type == 'LINEMARKER':
            filename, flags = LINEMARKER.match(self.current.value).groups()

            if not flags and len(self.source_context) == 0:
                self.current_file = filename

            if self.current_file not in ['<built-in>', '<command-line>']:
                if '1' in flags:
                    self.source_context.append(filename)

                    if len(self.source_context) == 1:
                        self.includes.append(
                            IncludeDirective.from_source_context(
                                self.source_context))
                elif '2' in flags:
                    self.source_context.pop()

            self.mark_for_erase(*self.current.span)
            self.next()
        elif self.current.is_keyword('__attribute__'):
            begin = self.current.span[0]
            stack_depth = len(self.bracket_stack)
            self.next()

            while len(self.bracket_stack) > stack_depth:
                self.next()

            self.mark_for_erase(begin, self.current.span[1])
        elif self.current.is_keyword('__extension__', '__restrict',
                                     '__signed__', '__signed', '_Nullable'):
            self.mark_for_erase(*self.current.span)
            self.next()
        elif self.current.type == 'PRAGMA':
            self.mark_for_erase(*self.current.span)
            self.next()

        return self.current

    def parse_typedef(self):
        begin = self.current.span[0]

        while self.bracket_stack or not self.current.is_punctuation(';'):
            self.next()

        code = self.read_source_code(begin, self.current.span[1])
        self.typedefs_code.append(code)

    def parse_struct(self, begin, _name):
        while self.bracket_stack:
            self.next()

        code = self.read_source_code(begin, self.current.span[1]) + ';'

        if self.is_in_header_file():
            self.structs_code.append(code)

    def is_in_header_file(self):
        return len(self.source_context) > 0

    def parse_function_declaration_or_struct(self):
        while self.current.is_prefix:
            self.next()

        begin = self.current.span[0]
        return_type = []

        if self.current.is_keyword('struct'):
            if self.next() and self.current.type == 'IDENTIFIER':
                struct_name = self.current.value

                if self.next() and self.current.value == '{':
                    self.parse_struct(begin, struct_name)

                    return None

        while (not self.current.is_punctuation('(')
               or self.next() and self.current.is_punctuation('*')):
            if not self.bracket_stack and self.current.is_punctuation(';'):
                return None

            return_type.append(self.current.value)
            self.next()

        if not return_type:
            return None

        func_name = return_type.pop()

        if func_name not in self.functions:
            return None

        while (self.bracket_stack
               or self.next() and self.current.is_punctuation('(')):
            self.next()

        code = self.read_source_code(begin, self.previous.span[1]) + ';'

        return func_name, code

    def mark_for_erase(self, begin, end):
        self.chunks_to_erase.append((begin, end))

    def read_source_code(self, begin, end):
        if self.chunks_to_erase:
            chunks = []
            offset = begin

            for chunk_begin, chunk_end in self.chunks_to_erase:
                if chunk_end < offset:
                    continue

                chunks.append(self.source_code[offset:chunk_begin])
                offset = chunk_end

            chunks.append(self.source_code[offset:end])
            self.chunks_to_erase = []
            code = ''.join(chunks)
        else:
            code = self.source_code[begin:end]

        return code.strip()
示例#14
0

parser = CParser()

buf = r'''
    static void foo()
    {
        char x;
        if ('\x1') {
            x = '\x5';
            x = '\x3';
        }
    }
'''

c_ast = parser.parse(buf, 'x.c')
c_ast.show()
print("#######")

v = CASTVisitor()
bytecode = v.visitMain(c_ast.ext[0])
print('\nbytecode: ' + str(bytecode))

labeled_bytecode = addLabels(bytecode)
print('\nlabeled bytecode:')
print_bytecode(labeled_bytecode)

jump_bytecode = addJumps(labeled_bytecode)
print('\njump bytecode:')
print_bytecode(jump_bytecode)
示例#15
0
class ForgivingDeclarationParser:
    linemarker = re.compile(r'^# \d+ "((?:\\.|[^\\"])*)"((?: [1234])*)$')

    tokens = {
        "LINEMARKER": r"^#.*$",
        "KEYWORD": (
            "\\b(?:auto|break|case|char|const|continue|default|do|double|else|enum|extern|float"
            "|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch"
            "|typedef|union|unsigned|void|volatile|while|__extension__|__attribute__|__restrict)\\b"
        ),
        "IDENTIFIER": r"\b[a-zA-Z_](?:[a-zA-Z_0-9])*\b",
        "CHARACTER": r"L?'(?:\\.|[^\\'])+'",
        "STRING": r'L?"(?:\\.|[^\\"])*"',
        "INTEGER": r"(?:0[xX][a-fA-F0-9]+|[0-9]+)[uUlL]*",
        "FLOAT": (
            r"(?:[0-9]+[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+(?:[Ee][+-]?[0-9]+)?|[0-9]+\.[0-9]*(?:[Ee][+-]?[0-9]+)?)[fFlL]?"
        ),
        "PUNCTUATION": (
            r"\.\.\.|>>=|<<=|\+=|-=|\*=|/=|%=|&=|\^=|\|=|>>|<<|\+\+|--|->|&&|\|\||<=|>=|"
            r"==|!=|;|\{|\}|,|:|=|\(|\)|\[|\]|\.|&|!|~|-|\+|\*|/|%|<|>|\^|\||\?"
        ),
        "SPACE": r"[ \t\v\n\f]*",
        "IGNORE": r".+?",
    }

    ignored_tokens = "SPACE", "IGNORE"

    regex = re.compile(
        "|".join(f"(?P<{token}>{pattern})" for token, pattern in tokens.items()),
        flags=re.MULTILINE,
    )

    def __init__(self, source_code, functions=None, keep_args=""):
        self.source_code = source_code
        self.functions = functions
        self.token_stream = self.tokenize(source_code)
        self.previous = None
        self.current = None

        self.bracket_stack = []
        self.source_context = []
        self.typedefs = ["typedef int __builtin_va_list;"]

        self.cparser = CParser()

        self.keep_args = re.compile(f"^{keep_args}$")

    @classmethod
    def tokenize(cls, source_code):
        for match in cls.regex.finditer(source_code):
            if match.lastgroup not in cls.ignored_tokens:
                yield Token(match.lastgroup, match.group().strip(), match.span())

    def __iter__(self):
        while self.next():
            if self.current.is_keyword("typedef"):
                self.parse_typedef()

            function = self.parse_function_declaration()

            if function is not None:
                yield function

            if self.functions is not None and not self.functions:
                break

            while self.current and not (
                self.current.is_punctuation(";", "}") and not self.bracket_stack
            ):
                self.next()

    def next(self):
        self.previous = self.current
        self.current = next(self.token_stream, None)

        if not self.current:
            return None

        if self.current.type == "PUNCTUATION":
            if self.current.value in "({[":
                self.bracket_stack.append(")}]"["({[".index(self.current.value)])
            elif self.bracket_stack and self.current.value == self.bracket_stack[-1]:
                self.bracket_stack.pop()

        elif self.current.type == "LINEMARKER":
            filename, flags = self.linemarker.match(self.current.value).groups()

            if "1" in flags:
                self.source_context.append(filename)
            elif "2" in flags:
                self.source_context.pop()

            try:
                self.source_context[-1] = filename
            except IndexError:
                self.source_context.append(filename)

            self.erase_code_section(*self.current.span)
            self.next()

        elif self.current.is_keyword("__attribute__"):
            begin = self.current.span[0]

            stack_depth = len(self.bracket_stack)
            self.next()

            while len(self.bracket_stack) > stack_depth:
                self.next()

            self.erase_code_section(begin, self.current.span[1])

        elif self.current.is_keyword("__extension__", "__restrict"):
            self.erase_code_section(*self.current.span)
            self.next()

        return self.current

    def parse_typedef(self):
        start_index = self.current.span[0]

        while self.current and not (
            self.current.is_punctuation(";") and not self.bracket_stack
        ):
            self.next()

        self.typedefs.append(self.source_code[start_index : self.current.span[1]])

    def parse_function_declaration(self):
        if self.bracket_stack:
            return None

        while self.current and self.current.is_prefix:
            self.next()

        start_index = self.current.span[0]
        return_type = []

        while (
            self.current
            and not self.current.is_punctuation("(")
            or self.next()
            and self.current.is_punctuation("*")
        ):
            if not self.bracket_stack and self.current.is_punctuation(";"):
                return None

            return_type.append(self.current.value)
            self.next()

        if not return_type:
            return None

        func_name = return_type.pop()

        if self.functions is not None and func_name not in self.functions:
            return None

        while (
            self.current
            and self.bracket_stack
            or self.next()
            and self.current.is_punctuation("(")
        ):
            self.next()

        signature = self.source_code[start_index : self.previous.span[1]] + ";"
        code = "\n".join(self.typedefs) + "\n" + signature

        try:
            file_ast = self.cparser.parse(code)
        except ParseError:
            return None
        else:
            if self.functions is not None:
                self.functions.remove(func_name)

            return MockedFunction(
                func_name,
                file_ast.ext[-1]
                if self.keep_args.match(func_name)
                else rename_arguments(file_ast.ext[-1]),
                IncludeDirective.from_source_context(self.source_context),
            )

    def erase_code_section(self, begin, end):
        self.source_code = (
            self.source_code[:begin] + " " * (end - begin) + self.source_code[end:]
        )
示例#16
0
#!/usr/bin/python
""" A demo showing the usage of the preprocessor with pycparsing """

import argparse
import io
from ppci.api import preprocess
from pycparser.c_parser import CParser

if __name__ == '__main__':
    # Argument handling:
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument('source', help='C source file')
    args = arg_parser.parse_args()
    filename = args.source

    # Preprocessing:
    f2 = io.StringIO()
    with open(filename, 'r') as f:
        preprocess(f, f2)
    source = f2.getvalue()

    # Parsing:
    parser = CParser()
    ast = parser.parse(source, filename)
    ast.show()
示例#17
0
文件: z.py 项目: terry2012/juxta
        method = 'visit_' + node.__class__.__name__
        visitor = getattr(self, method, self.generic_visit)
        return visitor(node)

    def visit_FuncCall(self, node):
        print("Visiting FuncCall")
        print(node.show())
        print('---- parent ----')
        print(self.current_parent.show())

    def generic_visit(self, node):
        """ Called if no explicit visitor function exists for a
            node. Implements preorder visiting of the node.
        """
        oldparent = self.current_parent
        self.current_parent = node
        for c in node.children():
            self.visit(c)
        self.current_parent = oldparent


if __name__ == "__main__":
    source_code = r'''void foo() {
    L"hi" L"there";
}
    '''

    parser = CParser()
    ast = parser.parse(source_code, filename='zz')
    ast.show(showcoord=True, attrnames=True, nodenames=True)
示例#18
0
文件: parser.py 项目: xcode2010/ghcc
def parse_decompiled_code(code: str,
                          lexer: LexerWrapper,
                          parser: CParser,
                          max_type_fix_tries: int = 10) -> Tuple[ASTNode, str]:
    r"""Parse preprocessed decompiled code and heuristically fix errors caused by undefined types.

    If a parse error is encountered, we attempt to fix the code by parsing the error message and checking whether if
    could be an undefined type error. If it is, we prepend a dummy ``typedef`` and retry parsing, until either the code
    parses or we run out of tries.

    :raises ValueError: When we've run out of tries for fixing types, or the issue cannot be resolved by adding a
        ``typedef`` (i.e., getting the same error after adding ``typedef``).
    :raises pycparser.c_parser.ParseError: When we cannot identify the error.

    :param code: The preprocessed code to parse
    :param lexer: The lexer to use while parsing.
    :param parser: The parser to use while parsing.
    :param max_type_fix_tries: Maximum retries to fix type errors.
    :return: A tuple containing the parsed AST and the modified code.
    """
    added_types: Set[str] = set()
    code_lines = code.split("\n")
    for _ in range(max_type_fix_tries):
        try:
            decompiled_ast = parser.parse(code)
            break
        except pycparser.c_parser.ParseError as e:
            error_match = PARSE_ERROR_REGEX.match(str(e))
            if error_match is None or not error_match.group("msg").startswith(
                    "before: "):
                raise
            before_token = remove_prefix(error_match.group("msg"), "before: ")
            error_line = code_lines[int(error_match.group("line")) - 1]
            error_pos = int(error_match.group("col")) - 1
            tokens = list(lexer.lex_tokens(error_line))
            try:
                error_token_idx = next(idx for idx, token in enumerate(tokens)
                                       if token.lexpos == error_pos
                                       and token.value == before_token)
                # There are multiple possible cases here:
                # 1. The type is the first ID-type token before the reported token (`type token`). It might not
                #    be the one immediately in front (for example, `(type) token`, `type *token`).
                # 2. The type is the token itself. This is rare and only happens in a situation like:
                #      `int func(const token var)`  or  `int func(int a, token b)`
                #    Replacing `const` with any combination of type qualifiers also works.
                if (error_token_idx > 0
                        and tokens[error_token_idx - 1].type in [
                            "CONST", "VOLATILE", "RESTRICT", "__CONST",
                            "__RESTRICT", "__EXTENSION__", "COMMA"
                        ]):
                    type_token = tokens[error_token_idx]
                else:
                    type_token = next(tokens[idx]
                                      for idx in range(error_token_idx -
                                                       1, -1, -1)
                                      if tokens[idx].type == "ID")
            except StopIteration:
                # If we don't catch this, it would terminate the for-loop in `main()`. Stupid design.
                raise e from None

            if type_token.value in added_types:
                raise ValueError(
                    f"Type {type_token.value} already added (types so far: {list(added_types)})"
                )
            added_types.add(type_token.value)
            typedef_line = f"typedef int {type_token.value};"
            code = typedef_line + "\n" + code
            code_lines.insert(0, typedef_line)
    else:
        raise ValueError(f"Type fixes exceeded limit ({max_type_fix_tries})")
    return decompiled_ast, code
示例#19
0
文件: z.py 项目: cplab/pycparser
        method = 'visit_' + node.__class__.__name__
        visitor = getattr(self, method, self.generic_visit)
        return visitor(node)

    def visit_FuncCall(self, node):
        print("Visiting FuncCall")
        print(node.show())
        print('---- parent ----')
        print(self.current_parent.show())

    def generic_visit(self, node):
        """ Called if no explicit visitor function exists for a
            node. Implements preorder visiting of the node.
        """
        oldparent = self.current_parent
        self.current_parent = node
        for c in node.children():
            self.visit(c)
        self.current_parent = oldparent


if __name__ == "__main__":
    source_code = r'''void foo() {
    L"hi" L"there";
}
    '''

    parser = CParser()
    ast = parser.parse(source_code, filename='zz')
    ast.show(showcoord=True, attrnames=True, nodenames=True)
示例#20
0
class Parser(object):
    """A class for parsing C headres to python structs. It saves the context and configuration"""
    def __init__(self, conf=gcc_x86_64_le, debuglevel=0):
        super(Parser, self).__init__()
        self.conf = conf
        self.debuglevel = debuglevel

        self.basics = conf.basics
        self.names_to_pycstructs = {}

        self.structs_num = 0
        self.unions_num = 0
        self.arrays_num = 0
        self.enums_num = 0
        self.cdata = ""
        self.last_processed = ""


        funcs = {}
        funcs[pycparser.c_ast.ID]                = self.id_handler
        funcs[pycparser.c_ast.IdentifierType]    = self.type_handler
        funcs[pycparser.c_ast.Struct]            = self.struct_handler
        funcs[pycparser.c_ast.Union]             = self.union_handler
        funcs[pycparser.c_ast.Enum]              = self.enum_handler
        funcs[pycparser.c_ast.EnumeratorList]    = self.enumerator_list_handler
        funcs[pycparser.c_ast.Enumerator]        = self.enumerator_handler
        funcs[pycparser.c_ast.ArrayDecl]         = self.array_handler
        funcs[pycparser.c_ast.PtrDecl]           = self.ptr_handler
        funcs[pycparser.c_ast.Typedef]           = self.typedef_handler
        funcs[pycparser.c_ast.Typename]          = self.typename_handler
        funcs[pycparser.c_ast.TypeDecl]          = self.typedecl_handler
        funcs[pycparser.c_ast.Decl]              = self.decl_handler
        funcs[pycparser.c_ast.FuncDecl]          = self.func_decl_handler
        funcs[pycparser.c_ast.FuncDef]           = self.func_def_handler
        funcs[pycparser.c_ast.Constant]          = self.constant_handler
        funcs[pycparser.c_ast.BinaryOp]          = self.binary_op_handler
        funcs[pycparser.c_ast.UnaryOp]           = self.unary_op_handler
        funcs[pycparser.c_ast.Cast]              = self.cast_handler
        self.funcs = funcs

        self.flush()

    def flush(self):
        self.pre = pcpp.Preprocessor()
        self.pre.line_directive = None

        self.cparse = CParser()
        # self.cparse.parse(
        #     """
        #     typedef int uint8_t;
        #     typedef int uint16_t;
        #     typedef int uint32_t;
        #     typedef int uint64_t;
        #     typedef int int8_t;
        #     typedef int int16_t;
        #     typedef int int32_t;
        #     typedef int int64_t;
        #     """, "", 7)

        self.cdata = ""
        self.last_processed = ""

    def __getattr__(self, name):
        if name in self.__dict__ or not self.has_type(name):
            return self.__getattribute__(name)
        return self.get_type(name)

    def has_type(self, val):
        return val in self.names_to_pycstructs or self.conf.has_type(val)

    def get_type(self, val):
        if self.conf.has_type(val):
            return self.conf.get_type(val)
        return self.names_to_pycstructs[val]

    def set_type(self, name, val):
        self.names_to_pycstructs[name] = val
        self.names_to_pycstructs[(name, )] = val
        return val

    def id_handler(self, node):
        assert type(node) is pycparser.c_ast.ID
        return self.get_type(node.name)


    def typedef_handler(self, node):
        assert type(node) is pycparser.c_ast.Typedef
        name = node.name
        val = self.parse_node(node.type)
        if name in [
            "uint8_t",
            "uint16_t",
            "uint32_t",
            "uint64_t",
            "int8_t",
            "int16_t",
            "int32_t",
            "int64_t",]:
            return self.get_type(name)
        return self.set_type(name, val)

    def _field_handler(self, node):
        assert type(node) is pycparser.c_ast.Decl
        name = node.name
        typ = self.parse_node(node.type)
        return name, typ

    def enum_handler(self, node):
        assert type(node) == pycparser.c_ast.Enum

        name = node.name
        self.enums_num += 1
        if name == None:
            name = "enum_num_%d" % self.enums_num

        values = self.parse_node(node.values)
        val = MetaPyEnum(name, (), dict(_values=values), self.conf)

        for item in val:
            self.set_type(str(item), item)

        return self.set_type(name, val)

    def enumerator_handler(self, node):
        assert type(node) == pycparser.c_ast.Enumerator
        return self.parse_node(node.value), node.name

    def enumerator_list_handler(self, node):
        assert type(node) == pycparser.c_ast.EnumeratorList
        res = []
        last = -1
        for item in node.enumerators:
            val, name = self.parse_node(item)
            if val is None:
                val = last + 1
            last = val
            self.set_type(name, val)
            res.append((val, name))
        return res

    def struct_handler(self, node):
        assert type(node) == pycparser.c_ast.Struct

        fields = []
        name = node.name
        
        if not node.decls:
            if self.has_type((name, )):
                return self.get_type((name, ))
            else:
                fields = None

        
        if fields is not None:
            for decl in node.decls:
                field_name, field_type = self._field_handler(decl)
                fields.append((field_name, field_type))

        self.structs_num += 1
        if name == None:
            name = "struct_num_%d" % self.structs_num

        if self.has_type((name, )) and isinstance(type(self.get_type((name, ))), MetaPyStruct):
            val = self.get_type((name, ))
            val.assign_fields(fields, self.conf)
        else:
            val = MetaPyStruct(name, (), {"_fields" : fields}, self.conf)
            val.__module__ = None
            self.set_type(name, val)

        return val

    def union_handler(self, node):
        assert type(node) == pycparser.c_ast.Union
        
        fields = []
        name = node.name
        
        if not node.decls:
            if self.has_type((name, )):
                return self.get_type((name, ))
            else:
                fields = None

        if fields is not None:
            for decl in node.decls:
                field_name, field_type = self._field_handler(decl)
                fields.append((field_name, field_type))

        self.unions_num += 1
        if name == None:
            name = "union_num_%d" % self.unions_num

        if self.has_type((name, )) and isinstance(type(self.get_type((name, ))), MetaPyUnion):
            val = self.get_type((name, ))
            val.assign_fields(fields, self.conf)
        else:
            val = MetaPyUnion(name, (), {"_fields" : fields}, self.conf)
            val.__module__ = None
            self.set_type(name, val)
        
        return val

    def array_handler(self, node):
        assert type(node) is pycparser.c_ast.ArrayDecl
        typ = self.parse_node(node.type)
        num = self.parse_node(node.dim)
        assert num is None or type(num) in [long, int]
        self.arrays_num += 1
        val = MetaPyArray("array_num_%d" % self.arrays_num, (), {"_type" : typ, "_count" : num}, self.conf)
        val.__module__ = None
        return val


    def type_handler(self, node):
        assert type(node) is pycparser.c_ast.IdentifierType
        assert self.has_type(tuple(node.names)), str(tuple(node.names))
        return self.get_type(tuple(node.names))

    def typedecl_handler(self, node):
        assert type(node) is pycparser.c_ast.TypeDecl
        return self.parse_node(node.type)

    def typename_handler(self, node):
        assert type(node) is pycparser.c_ast.Typename
        return self.parse_node(node.type)

    def constant_handler(self, node):
        assert type(node) is pycparser.c_ast.Constant
        if node.type == 'char':
            return ord(eval(node.value))
        if node.type == "int":
            return eval(node.value)

        assert 0, "Unknown constant type: %s" % node.type

    def ptr_handler(self, node):
        assert type(node) is pycparser.c_ast.PtrDecl
        return self.get_type(("void", "*", ))

    def decl_handler(self, node):
        assert type(node) is pycparser.c_ast.Decl
        return self.parse_node(node.type)

    def func_decl_handler(self, node):
        assert type(node) is pycparser.c_ast.FuncDecl
        return

    def func_def_handler(self, node):
        assert type(node) is pycparser.c_ast.FuncDef
        return

    def cast_handler(self, node):
        assert type(node) is pycparser.c_ast.Cast
        val = self.parse_node(node.expr)
        obj = self.parse_node(node.to_type)()
        obj._val_property = val
        return obj._val_property


    def binary_op_handler(self, node):
        assert type(node) is pycparser.c_ast.BinaryOp

        return eval("self.parse_node(node.left) %s self.parse_node(node.right)" % node.op)

    def unary_op_handler(self, node):
        assert type(node) is pycparser.c_ast.UnaryOp
        if node.op == "sizeof":
                return sizeof(self.parse_node(node.expr))

        if node.op == "~":
            return ~self.parse_node(node.expr)

        if node.op == "-":
            return -self.parse_node(node.expr)

        assert False, "Unknown unary op: %s" % node.op

    def parse_node(self, node):
        if node is None:
            return node

        if type(node) in self.funcs:
            return self.funcs[type(node)](node)

        node.show()
        assert 0, "Unknown handler for type: %s" % repr(type(node))

    def parse_string(self, data, file_name="<unknown>", include_dirs=[get_dir(__file__)], debuglevel=None):
        if debuglevel is None:
            debuglevel = self.debuglevel

        for i in include_dirs:
            self.pre.add_path(i)
        self.pre.parse(data)
        buff = cStringIO.StringIO()
        self.pre.write(buff)
        processed = buff.getvalue()

        self.last_processed = processed

        not_found = [line for line in processed.splitlines() if "#include" in line]
        if not_found:
            print "There is unresolved includes:"
            for line in not_found:
                print line

        assert "#include " not in processed

        for macro_name, macro in self.pre.macros.items():
            if not macro.arglist:
                self.set_type(macro_name, self.pre.evalexpr(macro.value, get_strings=True))

        types = """
            typedef int uint8_t;
            typedef int uint16_t;
            typedef int uint32_t;
            typedef int uint64_t;
            typedef int int8_t;
            typedef int int16_t;
            typedef int int32_t;
            typedef int int64_t;
            """
        contents = self.cparse.parse(types+processed, file_name)

        self.cdata += processed

        res = []
        for ex in contents.ext:
            if debuglevel:
                ex.show()
            res.append(self.parse_node(ex))
        res = res[8:]

        return res[0] if len(res) == 1 else res

    def parse_file(self, file_path, include_dirs=None, debuglevel=None):
        if include_dirs is None:
            include_dirs = [get_dir(__file__), get_dir(file_path)]

        with open(file_path, "rb") as f:
            data = f.read()

        return self.parse_string(data, file_path, include_dirs, debuglevel)

    def update_globals(self, g):
        """Enters the new classes to globals.
           You should call that functions like that: p.update_globals(globals())
        """
        self.conf.update_globals(g)
        g.update([(k,v) for k,v in self.names_to_pycstructs.items() if isinstance(k, str)])