Python ApiScout.evaluateImportTable примеры использования

Язык программирования: Python

Пространство имен/Пакет: apiscout.ApiScout

Класс/Тип: ApiScout

Метод/Функция: evaluateImportTable

Примеров на hotexamples.com: 2

Python ApiScout.evaluateImportTable - 2 примера найдено. Это лучшие примеры Python кода для apiscout.ApiScout.ApiScout.evaluateImportTable, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ApiScout(17)

crawl(6)

setBaseAddress(5)

loadDbFile(5)

loadWinApi1024(5)

filter(4)

ignoreAslrOffsets(4)

render(3)

matchVectors(3)

evaluateImportTable(2)

getNumApisLoaded(2)

has_64bit(2)

renderVectorResults(1)

renderVectorCollectionResults(1)

renderResultsVsCollection(1)

iterateAllQwords(1)

matchVectorCollection(1)

_resolveApiByAddress(1)

iterateAllDwords(1)

getWinApi1024Vectors(1)

setLoadOffset(1)

Пример #1

Показать файл

def main():
    parser = argparse.ArgumentParser(description='Demo: Use apiscout with a prepared api database (created using DatabaseBuilder.py) to crawl a dump for imports and render the results.')
    parser.add_argument('-f', '--filter', type=int, default=0, help='Filter out APIs that do not have a neighbour within N bytes.')
    parser.add_argument('-i', '--ignore_aslr', action='store_true', help='Do not apply the per-module ASLR offset potentially contained in a API DB file.')
    parser.add_argument('-c', '--collection_file', type=str, default='', help='Optionally match the output against a WinApi1024 vector collection file.')
    parser.add_argument('-b', '--base_addr', type=str, default='', help='Set base address to given value (int or 0x-hex format).')
    parser.add_argument('-t', '--import_table_only', action='store_true', help='Do not crawl for API references but only parse the import table instead - assumes an unmapped PE file as input.')
    parser.add_argument('binary_path', type=str, default='', help='Path to the memory dump to crawl.')
    parser.add_argument('db_path', type=str, nargs='*', help='Path to the DB(s). If no argument is given, use all files found in "./dbs"')

    args = parser.parse_args()
    if args.binary_path:
        binary = ""
        if os.path.isfile(args.binary_path):
            with open(args.binary_path, "rb") as f_binary:
                binary = f_binary.read()
        scout = ApiScout()
        base_addr = get_base_addr(args)
        print("Using base adress 0x{:x} to infer reference counts.".format(base_addr))
        scout.setBaseAddress(base_addr)
        # override potential ASLR offsets that are stored in the API DB files.
        scout.ignoreAslrOffsets(args.ignore_aslr)
        # load DB file
        db_paths = []
        if args.db_path:
            db_paths = args.db_path
        elif not args.import_table_only:
            db_paths = get_all_db_files()
        for db_path in db_paths:
            scout.loadDbFile(db_path)
        # load WinApi1024 vector
        scout.loadWinApi1024(get_winapi1024_path())
        # scout the binary
        results = {}
        if args.import_table_only:
            print("Parsing Import Table for\n  {}.".format(args.binary_path))
            results = scout.evaluateImportTable(binary, is_unmapped=True)
        else:
            print("Using \n  {}\nto analyze\n  {}.".format("\n  ".join(db_paths), args.binary_path))
            num_apis_loaded = scout.getNumApisLoaded()
            filter_info = " - neighbour filter: 0x%x" % args.filter if args.filter else ""
            print("Buffer size is {} bytes, {} APIs loaded{}.\n".format(len(binary), num_apis_loaded, filter_info))
            results = scout.crawl(binary)
        filtered_results = scout.filter(results, 0, 0, args.filter)
        print(scout.render(filtered_results))
        print(scout.renderVectorResults(filtered_results))
        if args.collection_file:
            print(scout.renderResultsVsCollection(filtered_results, args.collection_file))
    else:
        parser.print_help()

Пример #2

Показать файл

class Api():
    """Stores and analyze malwares info in neo4j"""
    def __init__(self,
                 host,
                 port,
                 user,
                 password,
                 threshold=40,
                 secure=False,
                 filepath=None,
                 filename=None,
                 folder_path=None):
        """Connects to neo4j database, loads options and set connectors.
        @raise CuckooReportError: if unable to connect.
        """
        self.threshold = int(threshold)
        self.graph = Graph(host=host,
                           user=user,
                           password=password,
                           secure=secure,
                           port=port)
        self.filepath = filepath
        self.filename = filename
        self.folder_path = folder_path
        self.scout = ApiScout()
        self.scout.setBaseAddress(0)
        self.scout.loadWinApi1024(
            os.path.abspath(os.path.join(os.path.dirname(__file__))) + os.sep +
            "data" + os.sep + "winapi1024v1.txt")

        self.magictest = magic.Magic(uncompress=True)
        CWD = os.path.abspath(os.path.dirname(__file__))
        USERDB = os.path.join(CWD, os.path.normpath("data/UserDB.TXT"))
        with open(USERDB, 'rt') as f:
            sig_data = f.read()
            self.signatures = peutils.SignatureDatabase(data=sig_data)

        if self.folder_path:
            self.files = self.get_files(folder_path)

    def check_file(self, f):
        if magic.from_file(f).find('PE32') == -1:
            return False
        if magic.from_file(f).find('self-extracting') != -1:
            return False
        try:
            pe = pefile.PE(f)
            matches = self.signatures.match_all(pe, ep_only=True)
            if matches:
                return False
            return True
        except:
            return False

    def get_files(self, folder_path):
        files_end = []

        files = []
        for root, dirnames, filenames in os.walk(folder_path):
            for filename in fnmatch.filter(filenames, '*'):
                files.append(os.path.join(root, filename))

        for filepath in files:
            if not self.check_file(filepath):
                continue
            json_path = "/".join(filepath.split(
                "/")[:-2]) + "/" + filepath.split("/")[-3] + ".json"
            if not os.path.exists(json_path):
                json_path = "/".join(filepath.split(
                    "/")[:-1]) + "/" + filepath.split("/")[-2] + ".json"
            if not os.path.exists(json_path):
                continue
            with open(json_path, 'r') as f:
                file_family = json.loads("".join([
                    str(x) for x in f.readlines()
                ])).get('common_name').replace(" ", "_")
            files_end.append((filepath, file_family))

        print(len(files_end), "Files da caricare")
        return files_end

    def get_digest(self, file):
        """ return hash, impuzzy and scout """
        md5 = hashlib.md5()
        sha1 = hashlib.sha1()
        sha256 = hashlib.sha256()

        try:
            impfuzzy = pyimpfuzzy.get_impfuzzy(file)
        except:
            impfuzzy = ""

        if os.path.isfile(file):
            with open(file, "rb") as f_binary:
                binary = f_binary.read()
        try:
            scout_ev = self.scout.evaluateImportTable(binary, is_unmapped=True)
            scout_result = self.scout.getWinApi1024Vectors(scout_ev).get(
                'import_table', {}).get('vector', None)
            scout_confidence = self.scout._apivector.getVectorConfidence(
                scout_result)
        except:
            with open('fail_list.txt', 'a') as f:
                f.write(file + "\n")
            scout_result = None
            scout_confidence = None

        with open(file, "rb") as f:
            while True:
                buf = f.read(2047)
                if not buf:
                    break
                md5.update(buf)
                sha1.update(buf)
                sha256.update(buf)

        return scout_result, impfuzzy, md5.hexdigest(), sha1.hexdigest(
        ), sha256.hexdigest(), scout_confidence

    def impfuzzy_comp(self, list, list_new):
        ssdeep = re.compile("^[0-9]{1,5}:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$",
                            re.DOTALL)
        complist = []
        list_len = len(list_new)
        i = 0
        for item_new in list_new:
            i += 1
            if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150:
                for j in range(i, list_len):
                    if re.search(ssdeep,
                                 list_new[j][2]) and len(list_new[j][2]) < 150:
                        complist.append([
                            item_new[0], list_new[j][0],
                            pyimpfuzzy.hash_compare(item_new[2],
                                                    list_new[j][2])
                        ])

        if list:
            for item_new in list_new:
                if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150:
                    for item in list:
                        if re.search(ssdeep, item[2]) and len(item[2]) < 150:
                            complist.append([
                                item_new[0], item[0],
                                pyimpfuzzy.hash_compare(item_new[2], item[2])
                            ])

        return complist

    def scout_comp(self, list, list_new):
        complist = []
        list_len = len(list_new)
        i = 0
        for item_new in list_new:
            i += 1
            for j in range(i, list_len):
                complist.append([
                    item_new[0], list_new[j][0],
                    int(
                        self.scout.matchVectors(item_new[3], list_new[j][3]) *
                        100)
                ])
        for item_new in list_new:
            for item in list:
                complist.append([
                    item_new[0], item[0],
                    int(self.scout.matchVectors(item_new[3], item[3]) * 100)
                ])
        return complist

    def process(self):

        hashlist = []
        hashlist_new = []
        nodes = []
        edges = []
        relationships = []

        # recover all actual data
        database = self.graph.run(
            "MATCH (m:Malware) RETURN m.id, m.name, m.impfuzzy, m.scout_result, m.scout_confidence, m.md5, m.sha1, m.sha256, m.tag"
        ).data()
        if database:
            for d in database:
                hashlist.append([
                    d["m.id"], d["m.name"], d["m.impfuzzy"],
                    d["m.scout_result"], d["m.scout_confidence"], d["m.md5"],
                    d["m.sha1"], d["m.sha256"], d["m.tag"]
                ])

        nodes_count = len(database)
        i = nodes_count

        relation_data = self.graph.run(
            "MATCH (m1:Malware)-[s:same]-(m2:Malware) RETURN m1.id, m2.id, s.value"
        ).data()
        if relation_data:
            for r in relation_data:
                relationships.append([r["m1.id"], r["m2.id"], r["s.value"]])
        for x in range(nodes_count):
            nodes.append(x)

        # if massive check for each file
        if self.folder_path:
            for item in self.files:
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    item[0])
                if scout_result in ("", 'A171', None):
                    continue

                query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
                objs = self.graph.run(query).data()
                if not objs and sha256 not in [x[5] for x in hashlist_new]:
                    nodes.append(i)
                    hashlist_new.append([
                        i, item[0].split("/")[-1], impfuzzy, scout_result,
                        scout_confidence, md5, sha1, sha256, item[1]
                    ])
                    i += 1
                else:
                    continue
        else:
            # if single we are in the reporting module
            # if file is tested it need to have valid apiscout vector
            if self.check_file(self.filepath):
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    self.filepath)
                if scout_result in ("", 'A171', None):
                    return {}
            else:
                return {}

            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256

            objs = self.graph.run(query).data()
            if not objs:
                nodes.append(nodes_count)
                hashlist_new.append([
                    nodes_count, self.filename, impfuzzy, scout_result,
                    scout_confidence, md5, sha1, sha256, None
                ])
            else:
                return self.search_hash(sha256)

        # Calculate apiscout correlation
        result_list = self.scout_comp(hashlist, hashlist_new)

        if len(database) != len(nodes):
            for edge in result_list + relationships:
                if edge[2] > self.threshold:
                    edges.append([[edge[0], edge[1]], edge[2]])
                else:
                    edges.append([[edge[0], edge[1]], 0])
            pyl = PyLouvain(nodes, edges)
            partition, modularity = pyl.apply_method()

        # Create node
        tx = self.graph.begin()

        for hash in hashlist_new + hashlist:
            i = 0
            for a in partition:
                i += 1
                if hash[0] in a:
                    tx.append(
                        statement_c, {
                            "id": hash[0],
                            "name": hash[1],
                            "impfuzzy": hash[2],
                            "scout_result": hash[3],
                            "scout_confidence": hash[4],
                            "md5": hash[5],
                            "sha1": hash[6],
                            "sha256": hash[7],
                            "tag": hash[8],
                            "cluster": i
                        })

        # Create relationship
        for result in result_list:
            if result[2] > self.threshold:
                tx.append(statement_r, {
                    "id1": result[0],
                    "id2": result[1],
                    "value_scout": result[2]
                })

        tx.process()
        tx.commit()

        # recover info
        if self.filename:
            return self.search_hash(sha256)

    def process_file(self, filepath, filename):
        self.filepath = filepath
        self.filename = filename
        return self.process()

    def search_hash(self, data):

        return_dict = {}

        # identify hash type
        HASHES = (
            ("md5", "^[a-fA-F0-9]{32}$"),
            ("sha1", "^[a-fA-F0-9]{40}$"),
            ("sha256", "^[a-fA-F0-9]{64}$"),
        )
        res = None
        for items in HASHES:
            if re.match(items[1], data):
                res = items[0]
        # No hash type match return
        if res == None:
            return {}

        family_query = "MATCH (m1:Malware) WHERE m1.%s=\"%s\" MATCH (m1:Malware)-[s:same]-(m2:Malware) MATCH (m2:Malware) WHERE m2.cluster = m1.cluster RETURN distinct m2.tag as tag, max(s.value) as max order by max(s.value) desc" % (
            res, data)
        file_query = "MATCH (m1:Malware) WHERE m1.%s=\"%s\" MATCH (m1:Malware)-[s:same]-(m2:Malware) MATCH (m2:Malware) WHERE m2.cluster = m1.cluster RETURN m2.tag as tag, m2.sha256 as sha256, max(s.value) as max order by max(s.value) desc LIMIT 10" % (
            res, data)
        cluster_count_query = "MATCH (m1:Malware) where m1.%s=\"%s\" MATCH (m2:Malware)-[p:same]->(m3:Malware) where m2.cluster = m1.cluster and m3.cluster = m1.cluster RETURN count(p.value) as total" % (
            res, data)
        cluster_query = "MATCH (m1:Malware) where m1.%s=\"%s\" MATCH (m2:Malware)-[p:same]->(m3:Malware) where m2.cluster = m1.cluster and m3.cluster = m1.cluster RETURN m2.sha256, m2.tag, p.value, m3.sha256, m3.cluster, m3.tag" % (
            res, data)
        item_query = "MATCH (m:Malware) WHERE m.%s=\"%s\" RETURN m" % (res,
                                                                       data)
        item_data = self.graph.run(item_query).data()
        cluster_count_list = self.graph.run(cluster_count_query).data()

        cluster_count = cluster_count_list[0]['total'] if len(
            cluster_count_list) > 0 else None
        return_dict['info'] = item_data[0]['m'] if len(item_data) > 0 else None

        family_objs = self.graph.run(family_query).data()
        if family_objs:
            return_dict['families'] = family_objs
            return_dict['files'] = self.graph.run(file_query).data()
            if cluster_count and cluster_count < 100:
                return_dict['cluster'] = self.graph.run(cluster_query).data()
            else:
                return_dict['cluster_count'] = cluster_count
        return return_dict