Пример #1
0
def PCA_on_training_model():
    file_list = interface.get_available_sha256()
    ex_list = np.array([
        pefeatures.PEFeatureExtractor().extract(interface.fetch_file(b))
        for b in file_list
    ])
    print("all_samples: ", ex_list.shape)
    # nor_list = normalize(ex_list, axis=0)
    # nor_list = MinMaxScaler().fit_transform(ex_list)

    nor_list, data_min, data_max, scale_, min_ = MinMaxImp(ex_list)

    pca = PCA(n_components=0.99).fit(nor_list)
    U, S, V = pca._fit(nor_list)
    # dic_elements = {"n_component":pca.n_components_, "scale_":scale_, "min_":min_}
    dic_elements = {"n_component": pca.n_components_}
    np.save("pca_models/features.npy", ex_list)
    np.save("pca_models/nor_features.npy", nor_list)
    np.save("pca_models/U.npy", U)
    np.save("pca_models/S.npy", S)
    np.save("pca_models/V.npy", V)
    np.save("pca_models/scale.npy", scale_)
    np.save("pca_models/min.npy", min_)
    createDictCSV("pca_models/dic_elements.csv", dic_elements)
    print("reduced dimension: ", pca.n_components_)
    return ex_list, nor_list, U, S, V
Пример #2
0
from gym_malware.envs.utils import interface
from gym_malware.envs.utils.pefeatures import PEFeatureExtractor

# bytez = interface.fetch_file("Backdoor.Win32.Hupigon.zay")
# features = PEFeatureExtractor().extract2(bytez)
# features2 = PEFeatureExtractor().extract(bytez)
# print(features.__len__())
# print(features2.__len__())

# bytez = interface.fetch_file("VirusShare_0b3c009aa4e461a00c0b3755976b485e")
# # print(bytez)
# features = PEFeatureExtractor().extract(bytez)
# print(features.__len__())
# print(features)

file_list = interface.get_available_sha256()
np.set_printoptions(threshold=1e6)

# run the tests
extractor = PEFeatureExtractor()
index = 0
for sha256 in file_list:
    print("{}:[file]:{}".format(index + 1, sha256))
    bytez = interface.fetch_file(sha256)

    index = index + 1
    print(extractor.extract2(bytez))
    print(extractor.extract2(bytez).shape)

    if index > 1:
        break
Пример #3
0
from gym.envs.registration import register

# get samples for environment
from gym_malware.envs.utils import interface, pefeatures
sha256 = interface.get_available_sha256()
# create a holdout set
from sklearn.model_selection import train_test_split
import numpy as np
np.random.seed(123)
sha256_train, sha256_holdout = train_test_split(sha256, test_size=200)

MAXTURNS = 80

register(id='malware-v0',
         entry_point='gym_malware.envs:MalwareEnv',
         kwargs={
             'random_sample': True,
             'maxturns': MAXTURNS,
             'sha256list': sha256_train
         })

register(id='malware-test-v0',
         entry_point='gym_malware.envs:MalwareEnv',
         kwargs={
             'random_sample': False,
             'maxturns': MAXTURNS,
             'sha256list': sha256_holdout
         })

register(id='malware-score-v0',
         entry_point='gym_malware.envs:MalwareScoreEnv',
Пример #4
0
from gym.envs.registration import register

# get samples for environment
from gym_malware.envs.utils import interface, pefeatures
sha256 = interface.get_available_sha256()
# create a holdout set
from sklearn.model_selection import train_test_split
import numpy as np
np.random.seed(123)
sha256_train, sha256_holdout = train_test_split(sha256, test_size=200)

MAXTURNS = 10

register(
    id='malware-v0',
    entry_point='gym_malware.envs:MalwareEnv',
    kwargs={'random_sample': True, 'maxturns': MAXTURNS, 'sha256list': sha256_train}
)

register(
    id='malware-test-v0',
    entry_point='gym_malware.envs:MalwareEnv',
    kwargs={'random_sample': False, 'maxturns': MAXTURNS, 'sha256list': sha256_holdout}
)

register(
    id='malware-score-v0',
    entry_point='gym_malware.envs:MalwareScoreEnv',
    kwargs={'random_sample': True, 'maxturns': MAXTURNS, 'sha256list': sha256_train}
)