예제 #1
0
def reformat(apis_file, all_sources, all_sinks, json_result_file, outfile):
    try:
        results = json.load(open(json_result_file, 'r'))
    except Exception as e:
        logging.error("failed to load progpilot results in json: %s",
                      json_result_file)
        return None

    logging.warning("there are %d sources and %d sinks checked!",
                    len(all_sources), len(all_sinks))
    # load the astgen config from file
    config = AstLookupConfig()
    read_proto_from_file(config, apis_file, binary=False)
    logging.warning("loaded config with %d apis to check!", len(config.apis))

    result = ModuleResult()
    set_result(result=result,
               apis=config.apis,
               all_sources=all_sources,
               all_sinks=all_sinks,
               flows=results)
    summary = ModuleSummary()
    set_summary(summary=summary,
                apis=config.apis,
                all_sources=all_sources,
                all_sinks=all_sinks,
                new_sources=None,
                new_sinks=None)
    static = ModuleStatic()
    static.flows.MergeFrom(result.flows)
    static.dangers.MergeFrom(result.dangers)
    static.sources.MergeFrom(summary.sources)
    static.sinks.MergeFrom(summary.sinks)
    static.taint_wrappers.MergeFrom(summary.taint_wrappers)
    write_proto_to_file(proto=static, filename=outfile, binary=False)
예제 #2
0
def run_extractor_worker(infile, outdir, extract_types=['SO'], in_type='APK', store_type="file_with_symlink'",
                         skip_processed=False, binary=False):
    # 1. extract the dex/so files, and store them to outdir. For each file, create a symbolic link using hash
    #   (used for deduplication)
    # 2. also store the ".components" file to the outdir
    logging.info("Processing %s" % infile)

    if skip_processed and exists(join(outdir, infile + COMPONENTS_SUFFIX)):
        logging.info("Skipping processed infile %s", infile)
        return

    extract_config = repo_pb.ExtractConfig()
    for extract_type in extract_types:
        extract_config.extract_types.append(getattr(repo_pb, extract_type))
    file_digest = hashfile(open(infile, 'rb'), hashlib.sha1())
    extract_config.inspect_compressed_files = True
    extract_config.in_path = infile.encode('utf8') if isinstance(infile, unicode) else infile
    extract_config.in_digest = file_digest
    extract_config.store_type = store_type
    extract_config.in_type = getattr(repo_pb, in_type)
    extract_config.out_path = outdir.encode('utf8') if isinstance(outdir, unicode) else outdir

    # extract types of files from infile, use symbol links for deduplication!
    extract_from_file_or_repo(extract_config=extract_config)
    outfile = join(extract_config.out_path, basename((extract_config.in_path)) + COMPONENTS_SUFFIX)
    write_proto_to_file(proto=extract_config, filename=outfile, binary=binary)
    logging.info("extracted %d components from %s, and saved output to %s", len(extract_config.components), infile,
                 outfile)
    return outfile
예제 #3
0
    def astgen(self, inpath, outfile, root=None, configpath=None, pkg_name=None, pkg_version=None, evaluate_smt=False):
        analyze_path, is_decompress_path, outfile, root, configpath = self._sanitize_astgen_args(
            inpath=inpath, outfile=outfile, root=root, configpath=configpath, language=self.language)

        astgen_cmd = ['java', '-jar', 'target/astgen-java-1.0.0-jar-with-dependencies.jar', '-inpath', analyze_path,
                      '-outfile', outfile, '-config', configpath]
        if isdir(analyze_path):
            raise Exception("Soot doesn't take a directory as input: %s", analyze_path)

        if analyze_path.endswith((".apk", ".dex")):
            # processing android apps requires android.jar
            astgen_cmd.extend(['-android_jar_dir', 'platforms/'])
            if analyze_path.endswith(".apk"):
                astgen_cmd.extend(['-intype', 'APK' '-process_dir', analyze_path])
            elif analyze_path.endswith(".dex"):
                astgen_cmd.extend(['-intype', 'DEX', '-process_dir', analyze_path])
        elif analyze_path.endswith((".java",)):
            astgen_cmd.extend(['-intype', 'SOURCE', '-process_dir', dirname(analyze_path)])
        elif analyze_path.endswith((".class",)):
            astgen_cmd.extend(['-intype', 'CLASS', '-process_dir', dirname(analyze_path)])
        elif analyze_path.endswith((".jar",)):
            # this is the default input type
            astgen_cmd.extend(['-intype', 'JAR', '-process_dir', analyze_path])
        elif analyze_path.endswith((".aar",)):
            # aar contains /classes.jar
            # https://developer.android.com/studio/projects/android-library
            astgen_cmd.extend(['-android_jar_dir', 'platforms/'])
            aar_file = get_file_with_meta(analyze_path)
            class_jar_content = aar_file.accessor.read('classes.jar')
            analyze_path_jar = join(dirname(analyze_path), splitext(basename(analyze_path))[0] + '.jar')
            open(analyze_path_jar, 'wb').write(class_jar_content)
            astgen_cmd.extend(['-intype', 'JAR', '-process_dir', analyze_path_jar])
        elif analyze_path.endswith((".war",)):
            # war contains lots of jar files in /WEB-INF/lib/
            # http://one-jar.sourceforge.net/
            logging.error("Not handling .war file yet: %s", analyze_path)
        else:
            logging.error("Input path has unexpected suffix: %s", analyze_path)
        # root is not used here
        if pkg_name is not None:
            astgen_cmd.extend(['-package_name', pkg_name])
        if pkg_version is not None:
            astgen_cmd.extend(['-package_version', pkg_version])
        exec_command("java astgen", astgen_cmd, cwd="static_proxy/astgen-java")

        # optionally evaluate smt formula
        if evaluate_smt:
            resultpb = PkgAstResults()
            read_proto_from_file(resultpb, filename=outfile, binary=False)
            satisfied = self._check_smt(astgen_results=[resultpb], configpath=configpath)
            resultpb.pkgs[0].config.smt_satisfied = satisfied
            write_proto_to_file(resultpb, filename=outfile, binary=False)

        # clean up residues
        self._cleanup_astgen(analyze_path=analyze_path, is_decompress_path=is_decompress_path)
예제 #4
0
    def astgen(self,
               inpath,
               outfile,
               root=None,
               configpath=None,
               pkg_name=None,
               pkg_version=None,
               evaluate_smt=False):
        analyze_path, is_decompress_path, outfile, root, configpath = self._sanitize_astgen_args(
            inpath=inpath,
            outfile=outfile,
            root=root,
            configpath=configpath,
            language=self.language)

        # ./vendor/nikic/php-parser/bin/php-parse  -d ../testdata/test-eval-exec.php
        configpb = AstLookupConfig()
        configpath_bin = configpath + '.bin'

        # create binary config from text format
        self._pb_text_to_bin(proto=configpb,
                             infile=configpath,
                             outfile=configpath_bin)
        astgen_cmd = [
            'php', 'astgen.php', '-c', configpath_bin, '-i', analyze_path,
            '-o', outfile
        ]
        if root is not None:
            astgen_cmd.extend(['-b', root])
        if pkg_name is not None:
            astgen_cmd.extend(['-n', pkg_name])
        if pkg_version is not None:
            astgen_cmd.extend(['-v', pkg_version])
        exec_command("php astgen", astgen_cmd, cwd="static_proxy")

        # convert binary output to text format
        resultpb = PkgAstResults()
        read_proto_from_file(resultpb, filename=outfile, binary=True)

        # optionally evaluate smt formula
        if evaluate_smt:
            satisfied = self._check_smt(astgen_results=[resultpb],
                                        configpath=configpath)
            resultpb.pkgs[0].config.smt_satisfied = satisfied

        # save resultpb
        write_proto_to_file(resultpb, filename=outfile, binary=False)

        # clean up residues
        self._cleanup_astgen(analyze_path=analyze_path,
                             is_decompress_path=is_decompress_path)
예제 #5
0
def py3_astgen(inpath,
               outfile,
               configpb,
               root=None,
               pkg_name=None,
               pkg_version=None):
    # get input files
    infiles, root = get_infiles(inpath=inpath, root=root)

    # initialize resultpb
    resultpb = PkgAstResults()
    pkg = resultpb.pkgs.add()
    pkg.config.CopyFrom(configpb)
    pkg.pkg_name = pkg_name if pkg_name is not None else basename(inpath)
    if pkg_version is not None:
        pkg.pkg_version = pkg_version
    pkg.language = ast_pb2.PYTHON
    for infile in infiles:
        all_source = open(infile, 'r').read()
        try:
            tree = ast.parse(all_source, filename=infile)
        except SyntaxError as se:
            logging.warning("Syntax error %s parsing file %s in python2!", se,
                            infile)
            raise se
        # mark the tree with tokens information
        asttok = asttokens.ASTTokens(source_text=all_source,
                                     tree=tree,
                                     filename=infile)
        visitor = PythonDeclRefVisitor(asttok=asttok, configpb=configpb)
        visitor.visit(tree)
        logging.warning("collected functions: %s",
                        Counter(visitor.get_declrefs()).items())

        filepb = get_filepb(infile, root)
        for base, name, args, source_text, source_range in visitor.get_declrefs(
        ):
            api_result = get_api_result(base, name, args, source_text,
                                        source_range, filepb)
            pkg.api_results.add().CopyFrom(api_result)

    # save resultpb
    write_proto_to_file(resultpb, outfile, binary=False)
예제 #6
0
    def taint(self,
              inpath,
              outfile,
              configpath=None,
              pkg_name=None,
              pkg_version=None):
        analyze_path, is_decompress_path, outfile, _, configpath = self._sanitize_astgen_args(
            inpath=inpath,
            outfile=outfile,
            root=None,
            configpath=configpath,
            language=self.language)

        # convert the config to binary
        configpb = AstLookupConfig()
        configpath_bin = configpath + '.bin'

        # create binary config from text format
        self._pb_text_to_bin(proto=configpb,
                             infile=configpath,
                             outfile=configpath_bin)

        # perform static taint analysis
        taint_cmd = [
            'node', 'jsprime_wrapper.js', pkg_name, analyze_path,
            configpath_bin, outfile
        ]
        exec_command("javascript taint", taint_cmd, cwd="static_proxy/jsprime")
        pkg_static = ModuleStatic()
        read_proto_from_file(pkg_static, outfile, binary=True)
        logging.warning("taint analysis results: %s", pkg_static)

        # save resultpb
        write_proto_to_file(pkg_static, filename=outfile, binary=False)

        # clean up residues
        os.remove(configpath_bin)
        self._cleanup_astgen(analyze_path=analyze_path,
                             is_decompress_path=is_decompress_path)
예제 #7
0
    def _gen_combined_configpath(self, configpath, dep_taint_results):
        # load the old config
        configpb = AstLookupConfig()
        read_proto_from_file(configpb, configpath, binary=False)

        # iterate through the taint results to update configpb
        num_new_sources = 0
        num_new_sinks = 0
        for dep_taint_result in dep_taint_results:
            # dep_taint_result is of type module_pb2.ModuleStatic
            for new_source in dep_taint_result.sources:
                configpb.apis.append(new_source.node)
                num_new_sources += 1
            for new_sink in dep_taint_result.sinks:
                configpb.apis.append(new_sink.node)
                num_new_sinks += 1
        if num_new_sources + num_new_sinks > 0:
            logging.warning("added %d new sources and %d new sinks!",
                            num_new_sources, num_new_sinks)

        # generate the new config file
        outf = tempfile.NamedTemporaryFile(prefix='configpath-', delete=False)
        write_proto_to_file(proto=configpb, filename=outf.name, binary=False)
        return outf.name
예제 #8
0
def secure_extract_from_file_or_repo(tuple_input):
    inpath, app_digest_set, component_digest_set, new_component_digest_set = tuple_input
    """A wrapper around extract_from_file_or_repo, simply configures the extract configuration

    :param inpath: input path specified by command line
    :param app_digest_set: all the app digest
    :param component_digest_set: all the component digest
    """
    msg = "Processing %s" % inpath
    logging.info(msg)
    print(msg)
    extract_config = repo_pb.ExtractConfig()
    for extract_type in FLAGS.extract_types:
        extract_config.extract_types.append(getattr(repo_pb, extract_type))
    if FLAGS.dbfile:
        extract_config.db_path = FLAGS.dbfile
    file_digest = hashfile(open(inpath, 'rb'), hashlib.sha1()) if os.path.isfile(inpath) else None
    extract_config.inspect_compressed_files = True
    extract_config.in_path = inpath.encode('utf8') if isinstance(inpath, unicode) else inpath
    extract_config.in_digest = file_digest
    extract_config.store_type = FLAGS.store_type
    # sub_in_type is the input type for workers
    extract_config.in_type = getattr(repo_pb, FLAGS.sub_in_type)
    # outpath is generic for all files, and files are named with digests
    extract_config.out_path = FLAGS.outdir.encode('utf8') if isinstance(FLAGS.outdir, unicode) else FLAGS.outdir

    if extract_config.store_type == 'database':
        # Store using database.
        raise Exception("deprecated")
        # extract_config.summary_table_name = FLAGS.summary_table
        # extract_config.detail_table_name = FLAGS.detail_table
        # db_obj_for_summary = SQLiteDatabase(dbpath=FLAGS.dbfile)
        # processed = db_obj_for_summary.exists_table(table_name=FLAGS.summary_table,
        #                                             where_name_value_dict={'app_digest': file_digest, 'processed': 1})
        # if not processed:
        #     try:
        #         extract_from_file_or_repo(extract_config)
        #         db_obj_for_summary.update_table(table_name=FLAGS.summary_table,
        #                                         # TODO: sharedlib_count is non-trivial to get, skipping for now!
        #                                         set_name_value_dict={'processed': 1},
        #                                         where_name_value_dict={'app_digest': file_digest})
        #     except Exception as e:
        #         db_obj_for_summary.update_table(table_name=FLAGS.summary_table,
        #                                         # 0, non-processed, 1, processed, -1, error processing
        #                                         set_name_value_dict={'processed': -1},
        #                                         where_name_value_dict={'app_digest': file_digest})
        #         msg = "Error processing %s: %s" % (inpath, e)
        #         logging.error(msg)
        #         print (msg)
    elif extract_config.store_type == 'file':
        if file_digest not in app_digest_set:  # not processed
            extract_from_file_or_repo(extract_config=extract_config,
                                      component_digest_set=component_digest_set,
                                      new_component_digest_set=new_component_digest_set,
                                      summarize_size=FLAGS.summarize_size)
            app_digest_set[file_digest] = True
            if len(extract_config.components) > 0:  # Something was extracted
                write_proto_to_file(proto=extract_config, filename=os.path.join(
                    extract_config.out_path, basename((extract_config.in_path)) + COMPONENTS_SUFFIX), binary=False)
                logging.debug("%s write analyzed components to file successful, extract_config:\n%s" % (
                extract_config.in_path, extract_config))
            else:
                logging.info("No extracted components:%s" % extract_config)
        else:
            logging.info("Skipping processed item: %s" % inpath)
    else:
        # file_with_symlink is not used here!
        raise Exception("Unhandled store type")
예제 #9
0
    def astfilter(self,
                  pkg_name,
                  outdir,
                  cache_dir=None,
                  configpath=None,
                  pkg_version=None,
                  pkg_manager=None,
                  ignore_dep_version=False,
                  ignore_dep=False):
        """
        Filters packages and their dependencies, based on sensitive APIs and their combinations

        This helps narrow down packages for further analysis.
        """
        # sanitize language
        if self.language is None:
            raise Exception("Invoking astfilter on invalid language: %s" %
                            self.language)

        if pkg_manager is None:
            pm_proxy = get_pm_proxy_for_language(language=self.language,
                                                 cache_dir=cache_dir,
                                                 isolate_pkg_info=True)
        else:
            pm_proxy = get_pm_proxy(pm=pkg_manager,
                                    cache_dir=cache_dir,
                                    isolate_pkg_info=True)
        # check for cached astfilter file
        astfilter_fname = pm_proxy.get_astfilter_fname(pkg_name=pkg_name,
                                                       pkg_version=pkg_version)
        astfilter_file = join(outdir, astfilter_fname)
        if exists(astfilter_file):
            logging.warning("skipping cached astfilter_file %s!",
                            astfilter_file)
            return

        # get the astgen results for the main package as well as its dependent packages
        astgen_results = []
        main_astgen_result = self.get_astgen_result(pm_proxy=pm_proxy,
                                                    pkg_name=pkg_name,
                                                    outdir=outdir,
                                                    configpath=configpath,
                                                    pkg_version=pkg_version)
        if main_astgen_result:
            astgen_results.append(main_astgen_result)
        else:
            logging.error("fail to run astfilter on pkg %s ver %s", pkg_name,
                          pkg_version)
            return

        # get flattened dependencies and their astgen results
        if not ignore_dep:
            try:
                flatten_dep_pkgs = pm_proxy.get_dep(pkg_name=pkg_name,
                                                    pkg_version=pkg_version,
                                                    flatten=True)
            except Exception as gde:
                logging.error("fail to get_dep on pkg %s ver %s: %s", pkg_name,
                              pkg_version, gde)
                return

            for dep_name, dep_version in flatten_dep_pkgs.items():
                if ignore_dep_version:
                    dep_version = None
                dep_astgen_result = self.get_astgen_result(
                    pm_proxy=pm_proxy,
                    pkg_name=dep_name,
                    outdir=outdir,
                    configpath=configpath,
                    pkg_version=dep_version)
                if dep_astgen_result:
                    astgen_results.append(dep_astgen_result)

        # check satisfiability of the specified smt formula and dump the corresponding output
        satisfied = StaticAnalyzer._check_smt(astgen_results=astgen_results,
                                              configpath=configpath)
        main_astgen_result.pkgs[0].config.smt_satisfied = satisfied

        # TODO: maybe record the suspicious API usage in each dependent package as well
        # dump the astfilter result to file
        write_proto_to_file(proto=main_astgen_result,
                            filename=astfilter_file,
                            binary=False)
예제 #10
0
 def _pb_text_to_bin(proto, infile, outfile):
     read_proto_from_file(proto=proto, filename=infile, binary=False)
     write_proto_to_file(proto=proto, filename=outfile, binary=True)
예제 #11
0
    def astgen(self, inpath, outfile, root=None, configpath=None, pkg_name=None, pkg_version=None, evaluate_smt=False):
        analyze_path, is_decompress_path, outfile, root, configpath = self._sanitize_astgen_args(
            inpath=inpath, outfile=outfile, root=root, configpath=configpath, language=self.language)

        # try python2
        try:
            # load the config proto
            configpb = AstLookupConfig()
            read_proto_from_file(configpb, configpath, binary=False)
            logging.debug("loaded lookup config from %s:\n%s", configpath, configpb)
            # invoke the language specific ast generators to call functions

            # get input files
            infiles, root = self._get_infiles(inpath=analyze_path, root=root, language=self.language)

            # initialize resultpb
            resultpb = PkgAstResults()
            pkg = resultpb.pkgs.add()
            pkg.config.CopyFrom(configpb)
            pkg.pkg_name = pkg_name if pkg_name is not None else basename(analyze_path)
            if pkg_version is not None:
                pkg.pkg_version = pkg_version
            pkg.language = ast_pb2.PYTHON
            for infile in infiles:
                all_source = open(infile, 'r').read()
                try:
                    tree = ast.parse(all_source, filename=infile)
                except SyntaxError as se:
                    logging.warning("Syntax error %s parsing file %s in python2!", se, infile)
                    raise se
                # mark the tree with tokens information
                asttok = asttokens.ASTTokens(source_text=all_source, tree=tree, filename=infile)
                visitor = PythonDeclRefVisitor(asttok=asttok, configpb=configpb)
                visitor.visit(tree)
                logging.warning("collected functions: %s", Counter(visitor.get_declrefs()).items())

                filepb = self._get_filepb(infile, root)
                for base, name, args, source_text, source_range in visitor.get_declrefs():
                    api_result = self._get_api_result(base, name, args, source_text, source_range, filepb)
                    pkg.api_results.add().CopyFrom(api_result)

            # save resultpb
            write_proto_to_file(resultpb, outfile, binary=False)

        # try python3
        except SyntaxError as se:
            logging.error("Syntax error %s, now trying to parse %s again in python3!", se, analyze_path)
            astgen_py3_cmd = ['python3', 'astgen_py3.py', analyze_path, outfile, '-c', configpath]
            if root is not None:
                astgen_py3_cmd.extend(['-b', root])
            if pkg_name is not None:
                astgen_py3_cmd.extend(['-n', pkg_name])
            if pkg_version is not None:
                astgen_py3_cmd.extend(['-v', pkg_version])
            exec_command("python3 astgen", astgen_py3_cmd, cwd="static_proxy")
        except Exception as e:
            logging.error("Fatal error %s running astgen for %s!", e, analyze_path)

        # optionally evaluate smt formula
        if evaluate_smt:
            resultpb = PkgAstResults()
            read_proto_from_file(resultpb, filename=outfile, binary=False)
            satisfied = self._check_smt(astgen_results=[resultpb], configpath=configpath)
            resultpb.pkgs[0].config.smt_satisfied = satisfied
            write_proto_to_file(resultpb, filename=outfile, binary=False)

        # clean up residues
        self._cleanup_astgen(analyze_path=analyze_path, is_decompress_path=is_decompress_path)
예제 #12
0
def reformat(apis_file, json_result_file, outfile):
    try:
        results = json.load(open(json_result_file, 'r'))
    except Exception as e:
        logging.error("failed to load pyt results in json: %s",
                      json_result_file)
        return None

    # load the astgen config from file
    config = AstLookupConfig()
    read_proto_from_file(config, apis_file, binary=False)
    logging.warning("loaded config with %d apis to check!", len(config.apis))

    # convert list of apis into dictionary with key=id, value=full_name for easier identification
    source_dict = {}
    sink_dict = {}
    for entry in config.apis:
        # FIXME: should we support func_only mode
        if entry.functionality == ast_pb2.SOURCE:
            source_dict[entry.id] = entry.full_name
        elif entry.functionality in (ast_pb2.SINK, ast_pb2.DANGER):
            sink_dict[entry.id] = entry.full_name

    nodes = []
    # dictionary with key=name of file within package found to contain vulnerabilities and value=tuple of (tree, asttok, visitor) for that file
    vuln_files_ASTs = {}
    for entry in results['vulnerabilities']:
        source = entry['source']
        # source['label'], source['line_number'], source['path']
        source_trigger_word = entry['source_trigger_word']
        sink = entry['sink']
        # sink['label'], sink['line_number'], sink['path']
        sink_trigger_word = entry['sink_trigger_word']
        api_type = entry['type']
        reassignment_nodes = entry['reassignment_nodes']
        # of type dict
        vuln_files_ASTs[source['path']] = ()
        vuln_files_ASTs[sink['path']] = ()
        nodes.append(
            Vulnerability(source, source_trigger_word, sink, sink_trigger_word,
                          api_type, reassignment_nodes))

    # initiate AST visitors (one tree per vulnerable file within package)
    for file in vuln_files_ASTs:
        src_ast = open(file, 'r').read()
        tree = ast.parse(src_ast, filename=file)
        asttok = asttokens.ASTTokens(source_text=src_ast,
                                     tree=tree,
                                     filename=file)
        # visitor = PythonVisitor(asttok=asttok)
        visit_info = (tree, asttok)
        vuln_files_ASTs[file] = visit_info

    # initialize result and summary
    result = ModuleResult()
    set_result(result, config.apis, source_dict, sink_dict, nodes,
               vuln_files_ASTs)
    summary = ModuleSummary()
    set_summary(summary, config.apis, source_dict, sink_dict, nodes,
                vuln_files_ASTs)
    static = ModuleStatic()
    static.flows.MergeFrom(result.flows)
    static.dangers.MergeFrom(result.dangers)
    static.sources.MergeFrom(summary.sources)
    static.sinks.MergeFrom(summary.sinks)
    static.taint_wrappers.MergeFrom(summary.taint_wrappers)
    write_proto_to_file(proto=static, filename=outfile, binary=False)
예제 #13
0
    def astgen(self,
               inpath,
               outfile,
               root=None,
               configpath=None,
               pkg_name=None,
               pkg_version=None,
               evaluate_smt=False):
        """
        There are two ways to implement the javascript ast parsing, each of them has their cons and pros.
        One is to directly use the npm esprima module, the other is to use the pypi esprima module.

        1. The npm module is the latest version and has lots of features to use directly. But it doesn't have a visitor
        and requires manually implementation.
        2. The pypi module is claimed to be a line by line translation of esprima in python, but it may be outdated and
        inactively maintained. However, it contains a visitor similar to python ast.NodeVisitor that we can directly use.

        To minimize the efforts, I currently choose the latter.
        """
        analyze_path, is_decompress_path, outfile, root, configpath = self._sanitize_astgen_args(
            inpath=inpath,
            outfile=outfile,
            root=root,
            configpath=configpath,
            language=self.language)

        # load the config proto
        configpb = AstLookupConfig()
        read_proto_from_file(configpb, configpath, binary=False)
        logging.debug("loaded lookup config from %s:\n%s", configpath,
                      configpb)
        # invoke the language specific ast generators to call functions

        # FIXME: current testdata sometimes fails the analyzer, inspect it!
        # get input files
        infiles, root = self._get_infiles(inpath=analyze_path,
                                          root=root,
                                          language=self.language)

        # initialize resultpb
        resultpb = PkgAstResults()
        pkg = resultpb.pkgs.add()
        pkg.config.CopyFrom(configpb)
        pkg.pkg_name = pkg_name if pkg_name is not None else basename(
            analyze_path)
        if pkg_version is not None:
            pkg.pkg_version = pkg_version
        pkg.language = ast_pb2.JAVASCRIPT
        for infile in infiles:
            all_source = open(infile, 'r').read()
            try:
                # tree = esprima.parseModule(), esprima.parseScript()
                tree = esprima.parse(all_source, options={'loc': True})
            except Exception as e:
                logging.error(
                    "Fatal error %s parsing file %s! Skipping this file!", e,
                    infile)
                continue
            visitor = JavaScriptDeclRefVisitor(source=all_source,
                                               configpb=configpb)
            visitor.visit(tree)
            logging.warning("collected functions: %s",
                            Counter(visitor.get_declrefs()).items())

            filepb = self._get_filepb(infile, root)
            for base, name, args, source_text, source_range in visitor.get_declrefs(
            ):
                api_result = self._get_api_result(base, name, args,
                                                  source_text, source_range,
                                                  filepb)
                pkg.api_results.add().CopyFrom(api_result)

        # optionally evaluate smt formula
        if evaluate_smt:
            satisfied = self._check_smt(astgen_results=[resultpb],
                                        configpath=configpath)
            resultpb.pkgs[0].config.smt_satisfied = satisfied

        # save resultpb
        write_proto_to_file(resultpb, outfile, binary=False)

        # clean up residues
        self._cleanup_astgen(analyze_path=analyze_path,
                             is_decompress_path=is_decompress_path)