Exemplo n.º 1
0
 def testLoadFingerprintCollectionFile(self):
     THIS_FILE_PATH = str(os.path.abspath(__file__))
     PROJECT_ROOT = str(os.path.abspath(os.sep.join([THIS_FILE_PATH, "..", ".."])))
     collection_path = os.sep.join([PROJECT_ROOT , "dbs", "collection_example.csv"])
     apivector = ApiVector()
     loaded_vectors = apivector._loadCollectionData(collection_path)
     self.assertTrue("win.urlzone" in loaded_vectors)
def vectorize(dataset, logs_folder, base, imports_type, data_dir, dataset_file=None, out_file=None):
    api_vector = ApiVector(base)
    vector_base = api_vector.getWinApi1024()
    vector_weights = detect_weights(os.path.basename(base), vector_base)
    vector_size = len(vector_base)

    if out_file is None:
        out_path = os.path.join(data_dir, "vectors")
    else:
        out_path = os.path.dirname(out_file)

    if not os.path.exists(out_path):
        try:
            os.makedirs(out_path)
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

    if out_file is None:
        out_file = os.path.join(out_path, "vectors_{}_{}_{}.json".format(imports_type, vector_weights, vector_size))

    vectors_dict = {
        "dataset": unix_relpath(dataset_file, out_path) if dataset_file is not None else None,
        "base": {
            "path": unix_relpath(base, out_path),
            "imports_type": imports_type,
            "weights": vector_weights,
            "size": vector_size
        },
        "vectors": {},
        "coverage": {
            "average": 0,
            "median": 0,
            "details": {}
        }
    }
    for sample in dataset:
        sample_log_path = os.path.join(logs_folder, "{}.json".format(sample))
        with open(sample_log_path, mode="r") as log_file:
            apis = json.load(log_file, cls=decoder, list_type=set)

            if imports_type == "all":
                apis = merge_two_dicts(apis["it"], apis["dynamic"])
            else:
                apis = apis[imports_type]

            result = api_vector.getApiVectorFromApiDictionary(apis)["user_list"]
            coverage = result["percentage"]
            vectors_dict["vectors"][sample] = result["vector"]
            vectors_dict["coverage"]["details"][sample] = coverage
            vectors_dict["coverage"]["average"] += coverage
    vectors_dict["coverage"]["average"] /= len(vectors_dict["coverage"]["details"])
    vectors_dict["coverage"]["median"] = median(list(vectors_dict["coverage"]["details"].values()))

    with open(out_file, mode="w") as vectors_file:
        json.dump(vectors_dict, vectors_file, indent=4)
Exemplo n.º 3
0
 def setUpClass(cls):
     super(ApiVectorTestSuite, cls).setUpClass()
     cls.vector = ApiVector()
     cls.vector._winapi1024 = [
         ("kernel32.dll", "CreateActCtx", "execution", 928),
         ("kernel32.dll", "DeactivateActCtx", "execution", 927),
         ("kernel32.dll", "ReleaseActCtx", "execution", 922),
         ("ole32.dll", "CoCreateInstance", "execution", 105),
         ("ole32.dll", "CoCreateInstanceEx", "execution", 547),
         ("ole32.dll", "CoGetClassObject", "execution", 644),
         ("kernel32.dll", "Module32First", "execution", 445),
         ("kernel32.dll", "Module32Next", "execution", 560),
         ("advapi32.dll", "CreateService", "execution", 360),
         ("advapi32.dll", "DeleteService", "execution", 299)
     ]
     cls.vector._dllapi_only = [
         ("kernel32.dll", "CreateActCtx"),
         ("kernel32.dll", "DeactivateActCtx"),
         ("kernel32.dll", "ReleaseActCtx"),
         ("ole32.dll", "CoCreateInstance"),
         ("ole32.dll", "CoCreateInstanceEx"),
         ("ole32.dll", "CoGetClassObject"),
         ("kernel32.dll", "Module32First"),
         ("kernel32.dll", "Module32Next"),
         ("advapi32.dll", "CreateService"),
         ("advapi32.dll", "DeleteService"),
     ]
     cls.vector._vector_ranks_only = [
         928, 927, 922, 105, 547, 644, 445, 560, 360, 299
     ]
Exemplo n.º 4
0
def main(args):
    with open(args.vectors_file, mode="r") as fp:
        vectors_file = json.load(fp)

        vectors = vectors_file["vectors"]
        vectors_dir = os.path.dirname(args.vectors_file)

        jobs_list = list(combinations(vectors.keys(), 2))
        splitted_jobs = split_jobs(jobs_list, args.threads)
        logging.debug("Jobs count: {}".format(len(jobs_list)))
        logging.debug("  For each worker: {}".format([len(splitted) for splitted in splitted_jobs]))

        vectorbase_path = os.path.normpath(os.path.join(vectors_dir, vectors_file["base"]["path"]))
        if not os.path.isfile(vectorbase_path):
            logging.error("VectorBase file '{}' does not exist".format(vectorbase_path))
            sys.exit(1)

        scores = {}
        freeze_support()  # for Windows support
        with Pool(args.threads, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as pool:
            api_vector = ApiVector(vectorbase_path)
            logging.info("Calculating the scores...")
            results = []
            for i in range(args.threads):
                task_args = (i, args.threads, api_vector, splitted_jobs[i], vectors, args.verbose)
                results.append(pool.apply_async(worker_function, args=task_args))
            pool.close()
            pool.join()
            logging.info("  COMPLETED")
            logging.info("Merging the results... ")
            scores = merge_dictionaries([res.get() for res in results])
            logging.info("  COMPLETED")

        out_dir = os.path.join(os.path.dirname(vectors_dir), "scores")
        base = vectors_file["base"]
        if args.out_file is None:
            args.out_file = os.path.join(out_dir, "scores_{}_{}_{}.json".format(base["imports_type"],
                                                                                base["weights"],
                                                                                base["size"]))
        if not os.path.exists(os.path.dirname(args.out_file)):
            try:
                os.makedirs(os.path.dirname(args.out_file))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise

        dataset_path = os.path.normpath(os.path.join(vectors_dir, vectors_file["dataset"]))
        scores_dict = {
            "dataset": os.path.relpath(dataset_path, out_dir),
            "base": vectors_file["base"],
            "scores": scores
        }
        scores_dict["base"]["path"] = os.path.relpath(vectorbase_path, out_dir)

        with open(args.out_file, mode="w") as opf:
            logging.info("Dumping scores to '{}'... ".format(os.path.basename(args.out_file)))
            json.dump(scores_dict, opf, indent=4)
            logging.info("  COMPLETED")
Exemplo n.º 5
0
 def testVectorCompression(self):
     apivector = ApiVector()
     for exponent in range(3, 13, 1):
         vector_length = 2**exponent
         apivector._winapi1024 = [x for x in range(vector_length)]
         random_vector = [
             random.randint(0, 1) for i in range(vector_length)
         ]
         compressed = apivector.compress(random_vector)
         decompressed = apivector.decompress(compressed)
         n_decompressed = apivector.n_decompress(compressed)
         self.assertEqual(vector_length, len(decompressed))
         self.assertEqual(vector_length, len(n_decompressed))
Exemplo n.º 6
0
 def testLoadWinApi1024Definition(self):
     module_path = os.path.dirname(os.path.realpath(apiscout.__file__))
     LOG.info("Using module path %s" % module_path)
     winapi_path = os.sep.join([module_path, "data", "winapi1024v1.txt"])
     apivector = ApiVector(winapi_path)
     self.assertEquals(len(apivector._winapi1024), 1024)
Exemplo n.º 7
0
 def testLoadWinApi1024Definition(self):
     THIS_FILE_PATH = str(os.path.abspath(__file__))
     PROJECT_ROOT = str(os.path.abspath(os.sep.join([THIS_FILE_PATH, "..", ".."])))
     winapi_path = os.sep.join([PROJECT_ROOT , "data", "winapi1024v1.txt"])
     apivector = ApiVector(winapi_path)
     self.assertEquals(len(apivector._winapi1024), 1024)