Exemplo n.º 1
0
    def _reset(self):
        self.turns = 0
        while True:
            # get the new environment
            if self.random_sample:
                self.sha256 = random.choice(self.available_sha256)
            else:  # draw a sample at random
                self.sha256 = self.available_sha256[self.sample_iteration_index
                                                    %
                                                    len(self.available_sha256)]
                self.sample_iteration_index += 1

            self.history[self.sha256] = {'actions': [], 'evaded': False}
            if self.cache:
                self.bytez = self.samples[self.sha256]
            else:
                try:
                    self.bytez = interface.fetch_file(self.sha256)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

            original_label = label_function(self.bytez)
            if original_label == 0:
                # skip this one, it's already benign, and the agent will learn nothing
                continue

            print("new sha256: {}".format(self.sha256))

            self.observation_space = self.feature_extractor.extract(self.bytez)

            break  # we're done here

        return np.asarray(self.observation_space)
Exemplo n.º 2
0
    def __init__(self,
                 sha256list,
                 random_sample=True,
                 maxturns=3,
                 output_path='evaded/blackbox/',
                 cache=False):
        self.cache = cache
        self.available_sha256 = sha256list
        self.action_space = spaces.Discrete(len(ACTION_LOOKUP))
        self.maxturns = maxturns
        self.feature_extractor = pefeatures.PEFeatureExtractor()
        self.random_sample = random_sample
        self.sample_iteration_index = 0
        self.output_path = os.path.join(
            os.path.dirname(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
            output_path)
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        self.history = OrderedDict()

        self.samples = {}
        if self.cache:
            for sha256 in self.available_sha256:
                try:
                    self.samples[sha256] = interface.fetch_file(self.sha256)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

        self._reset()
Exemplo n.º 3
0
    def __init__(self, sha256list, random_sample=True, maxturns=3, output_path='evaded/blackbox/', cache=False):
        self.cache = cache
        self.available_sha256 = sha256list
        self.action_space = spaces.Discrete(len(ACTION_LOOKUP))
        self.maxturns = maxturns
        self.feature_extractor = pefeatures.PEFeatureExtractor()
        self.random_sample = random_sample
        self.sample_iteration_index = 0
        self.output_path = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.abspath(__file__)))), output_path)
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        self.history = OrderedDict()

        self.samples = {}
        if self.cache:
            for sha256 in self.available_sha256:
                try:
                    self.samples[sha256] = interface.fetch_file(self.sha256)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

        self._reset()
Exemplo n.º 4
0
def evaluate(action_function):
    success = []
    misclassified = []
    for sha256 in sha256_holdout:
        print("#########{}#########".format(sha256))
        success_dict = defaultdict(list)
        bytez_o = interface.fetch_file(sha256)
        label = interface.get_label_windefender(bytez_o)
        if label == 0.0:
            misclassified.append(sha256)
            continue  # already misclassified, move along
        bytez = bytez_o
        for _ in range(MAXTURNS):
            action = action_function(bytez)
            print(action)
            success_dict[sha256].append(action)
            bytez = manipulate.modify_without_breaking(bytez, [action])
            new_label = interface.get_label_windefender(bytez)
            if new_label == 0.0:
                success.append(success_dict[sha256])
                os.mkdir(PATH_SAVE + '/' + sha256)
                with open(PATH_SAVE + '/' + sha256 + '/orign', 'wb') as f:
                    f.write(bytez_o)
                with open(PATH_SAVE + '/' + sha256 + '/modified', 'wb') as f:
                    f.write(bytez)
                break
    return success, misclassified  # evasion accuracy is len(success) / len(sha256_holdout)
Exemplo n.º 5
0
    def _reset(self):
        self.turns = 0
        while True:
            # get the new environment
            if self.random_sample:
                self.sha256 = random.choice(self.available_sha256)
            else: # draw a sample at random
                self.sha256 = self.available_sha256[ self.sample_iteration_index % len(self.available_sha256) ]
                self.sample_iteration_index += 1

            self.history[self.sha256] = {'actions': [], 'evaded': False}
            if self.cache:
                self.bytez = self.samples[self.sha256]
            else:
                try:
                    self.bytez = interface.fetch_file(self.sha256)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

            original_label = label_function(self.bytez)            
            if original_label == 0:
                # skip this one, it's already benign, and the agent will learn nothing
                continue

            print("new sha256: {}".format(self.sha256))                

            self.observation_space = self.feature_extractor.extract(self.bytez)

            break  # we're done here

        return np.asarray(self.observation_space)
Exemplo n.º 6
0
def PCA_on_training_model():
    file_list = interface.get_available_sha256()
    ex_list = np.array([
        pefeatures.PEFeatureExtractor().extract(interface.fetch_file(b))
        for b in file_list
    ])
    print("all_samples: ", ex_list.shape)
    # nor_list = normalize(ex_list, axis=0)
    # nor_list = MinMaxScaler().fit_transform(ex_list)

    nor_list, data_min, data_max, scale_, min_ = MinMaxImp(ex_list)

    pca = PCA(n_components=0.99).fit(nor_list)
    U, S, V = pca._fit(nor_list)
    # dic_elements = {"n_component":pca.n_components_, "scale_":scale_, "min_":min_}
    dic_elements = {"n_component": pca.n_components_}
    np.save("pca_models/features.npy", ex_list)
    np.save("pca_models/nor_features.npy", nor_list)
    np.save("pca_models/U.npy", U)
    np.save("pca_models/S.npy", S)
    np.save("pca_models/V.npy", V)
    np.save("pca_models/scale.npy", scale_)
    np.save("pca_models/min.npy", min_)
    createDictCSV("pca_models/dic_elements.csv", dic_elements)
    print("reduced dimension: ", pca.n_components_)
    return ex_list, nor_list, U, S, V
Exemplo n.º 7
0
    def _reset(self):
        self.turns = 0
        self.episode += 1
        while True:
            # get the new environment
            if self.random_sample:
                self.sha256 = random.choice(self.available_sha256)
            else:  # draw a sample at random
                self.sha256 = self.available_sha256[self.sample_iteration_index
                                                    %
                                                    len(self.available_sha256)]
                self.sample_iteration_index += 1

            self.history[self.sha256] = {'actions': [], 'evaded': False}

            if self.cache:
                self.bytez = self.samples[self.sha256]
            else:
                try:
                    self.bytez = interface.fetch_file(self.sha256, self.test)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

            if self.test and self.episode > 0:
                with open(
                        os.path.join(
                            self.original_path,
                            str(self.sample_iteration_index - 2) +
                            self.sha256), 'wb') as outfile:
                    outfile.write(self.bytez)

            original_label = label_function(self.bytez)
            if original_label == 0:
                # skip this one, it's already benign, and the graduation_agent will learn nothing
                continue

            self.tips = ' ' if not self.test else 'test '

            if self.episode > 0:
                print(
                    "--------------------------------------------------------------------------------"
                )
                print("{}episode {} select training sample: {}".format(
                    self.tips, self.episode, self.sha256))
                if self.test:
                    with open("test_log.txt", 'a+') as f:
                        f.write("Process {} select sample: {}\n".format(
                            self.sample_iteration_index - 2, self.sha256))
                print(
                    "--------------------------------------------------------------------------------"
                )

            # self.observation_space = self.feature_extractor.extract2(self.bytez)
            # PCA
            self.observation_space = self.compute_observation(self.bytez)

            break  # we're done here

        return np.asarray(self.observation_space)
Exemplo n.º 8
0
    def _reset(self):
        self.turns = 0
        self.episode += 1
        while True:
            # get the new environment
            if self.random_sample:
                self.sha256 = random.choice(self.available_sha256)
            else:  # draw a sample at random
                self.sha256 = self.available_sha256[self.sample_iteration_index
                                                    %
                                                    len(self.available_sha256)]
                self.sample_iteration_index += 1

            self.history[self.sha256] = {'actions': [], 'evaded': False}
            if self.cache:
                self.bytez = self.samples[self.sha256]
            else:
                try:
                    self.bytez = interface.fetch_file(self.sha256)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

            self.original_score = score_function(self.bytez)
            if self.original_score < malicious_threshold:
                # skip this one, it's already benign, and the graduation_agent will learn nothing
                continue

            if self.episode > 0:
                print(
                    "--------------------------------------------------------------------------------"
                )
                print("episode {} select training sample: {}".format(
                    self.episode, self.sha256))
                print(
                    "--------------------------------------------------------------------------------"
                )

            self.observation_space = self.feature_extractor.extract(self.bytez)

            break  # we're done here

        return np.asarray(self.observation_space)
def evaluate(action_function):
    success = []
    misclassified = []
    for sha256 in sha256_holdout:
        success_dict = defaultdict(list)
        bytez = interface.fetch_file(sha256)
        label = interface.get_label_local(bytez)
        if label == 0.0:
            misclassified.append(sha256)
            continue  # already misclassified, move along
        for _ in range(MAXTURNS):
            action = action_function(bytez)
            print(action)
            success_dict[sha256].append(action)
            bytez = manipulate.modify_without_breaking(bytez, [action])
            new_label = interface.get_label_local(bytez)
            if new_label == 0.0:
                success.append(success_dict)
                break
    return success, misclassified  # evasion accuracy is len(success) / len(sha256_holdout)
Exemplo n.º 10
0
def evaluate( action_function ):
    success=[]
    misclassified = []
    for sha256 in sha256_holdout:
        success_dict = defaultdict(list)
        bytez = interface.fetch_file(sha256)
        label = interface.get_label_local(bytez)
        if label == 0.0:
            misclassified.append(sha256)
            continue # already misclassified, move along
        for _ in range(MAXTURNS):
            action = action_function( bytez )
            print(action)
            success_dict[sha256].append(action)
            bytez = manipulate.modify_without_breaking( bytez, [action] )
            new_label = interface.get_label_local( bytez )
            if new_label == 0.0:
                success.append(success_dict)
                break
    return success, misclassified # evasion accuracy is len(success) / len(sha256_holdout)
Exemplo n.º 11
0
    def __init__(self, sha256list, random_sample=True, maxturns=3, output_path='evaded/blackbox/', cache=True,
                 test=False):
        # PCA部分
        # features, nor_features, U, S, V, scale_, min_, pca_component = self.load_PCA_model()
        # self.PCA_V = V
        # self.feature_scale_ = scale_
        # self.feature_min_ = min_
        # self.PCA_component = pca_component

        self.total_turn = 0
        self.episode = -1  # 共训练了多少轮
        self.cache = cache
        self.available_sha256 = sha256list
        self.action_space = spaces.Discrete(len(ACTION_LOOKUP))
        self.maxturns = maxturns
        self.feature_extractor = pefeatures.PEFeatureExtractor()
        self.random_sample = random_sample
        self.sample_iteration_index = 0
        self.test = test
        self.output_path = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.abspath(__file__)))), output_path)
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        self.history = OrderedDict()
        self.current_reward = 0

        self.samples = {}
        if self.cache:
            for sha256 in self.available_sha256:
                try:
                    self.samples[sha256] = interface.fetch_file(sha256, self.test)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

        self._reset()
Exemplo n.º 12
0
from gym_malware.envs.utils.pefeatures import PEFeatureExtractor

# bytez = interface.fetch_file("Backdoor.Win32.Hupigon.zay")
# features = PEFeatureExtractor().extract2(bytez)
# features2 = PEFeatureExtractor().extract(bytez)
# print(features.__len__())
# print(features2.__len__())

# bytez = interface.fetch_file("VirusShare_0b3c009aa4e461a00c0b3755976b485e")
# # print(bytez)
# features = PEFeatureExtractor().extract(bytez)
# print(features.__len__())
# print(features)

file_list = interface.get_available_sha256()
np.set_printoptions(threshold=1e6)

# run the tests
extractor = PEFeatureExtractor()
index = 0
for sha256 in file_list:
    print("{}:[file]:{}".format(index + 1, sha256))
    bytez = interface.fetch_file(sha256)

    index = index + 1
    print(extractor.extract2(bytez))
    print(extractor.extract2(bytez).shape)

    if index > 1:
        break
Exemplo n.º 13
0
import hashlib

from gym_malware.envs.controls import manipulate2 as manipulate
from gym_malware.envs.utils import interface

# np.random.seed(322333)
# random_action = lambda bytez: np.random.choice( list(manipulate.ACTION_TABLE.keys()) )
from other import lief_test

random_action = lambda bytez: 'section_append'
# original 32bit putty.exe in sha256 file name
fileName = 'VirusShare_0a7e4d6c6006ed4a33b5fe0c181062c0'
bytez = interface.fetch_file(fileName)
lief_test.showPE(fileName)

action = random_action(bytez)
print(action)
bytez_mod = manipulate.modify_without_breaking(bytez, [action])

with open("putty-mod32.exe", "wb") as new_file:
    new_file.write(bytez_mod)
    lief_test.showPE("putty-mod32.exe")
Exemplo n.º 14
0
import lief

from gym_malware.envs.utils import interface

# 从bytez字节parse,该方法只能使用lief 0.7版本
# Mac系统下使用0.8要报错,原因未知
from gym_malware.envs.utils.extractPE import extractPE

byte = interface.fetch_file("Backdoor.Win32.Hupigon.zay")
binary = lief.PE.parse(byte)
print(binary)