Exemplo n.º 1
0
def armed(bin_bytes, sample, n, rounds, files_expected, detection_threshold,
          scanner):
    '''
		ARMED: Automatic Random Malware Modifications to Evade Detection
		This function injects n random perturbations to input PE malware 
		in order to find adversarial examples. 
		
		Input: 
			bin_bytes: binaries from input malware sample
			sample: malware sample in terminal
			n: number of perturbations to inject
			rounds: number of rounds to run when searching for evasions
			files_expected: number of malware mutations expected as output
			detection_threshold: run until number of detections is below threshold (only for VirusTotal)
			scanner: commercial AV or malware model classifier 
	'''
    # Decide whether to use remote (VirusTotal) or local detection & remote or local sandbox
    useVT = False
    useHA = False

    # Iterate to generate -m mutations for all perturbations on the loop
    start = time()
    max_number_perts = n
    while n <= max_number_perts:
        new_samples = 0
        new_corrupt_samples = 0
        for r in range(rounds):

            # Create a dict with all perturbations & choose random actions
            actions = f.actions_vector(m.ACTION_TABLE.keys())
            chosen_actions = f.create_random_actions(actions, n)

            # Call a recursive function to inject n perturbations on a given sample
            print(
                '\n### ARMED: Automatic Random Malware Modifications to Evade Detection ###\n'
            )
            print('# Manipulation Box # Round {} of {} #\n'.format(
                r + 1, rounds))
            perturbs = n - 1
            start_pert = time()
            mod_sample = f.rec_mod_files(bin_bytes, actions, chosen_actions,
                                         perturbs, n)
            print('Time injecting perturbations: {} s'.format(
                round(time() - start_pert, 2)))

            # Send the modified sample to sandbox to check functionality (not corrupt)
            print('\n# Sandbox (Oracle) # Round {} of {} #'.format(
                r + 1, rounds))

            # Check if use remote or local sandbox
            if useHA:
                json_send_HA = f.send_HA(mod_sample, 120)
            else:
                json_send = f.send_local_sandbox(mod_sample)

# Calculate hashes from original and modified sample
            hash_sample = f.hash_files(sample)
            mod_sample_hash = f.hash_files(mod_sample)

            # Get VT detections for original sample to use as benchmark
            sample_report = f.get_report_VT(hash_sample, rescan=False)
            #sample_report = {'positives': 49, 'total': 66} # Debug mode (without VT/offline)

            # Collect info to writeCSV function
            CSV = f.collect_info_CSV(sample, sample_report, n - 1,
                                     chosen_actions, mod_sample_hash,
                                     hash_sample)

            # Malware analysis & malware detection stages
            funcional = False
            funcional, url_sandbox = malware_analysis(mod_sample, json_send,
                                                      useVT, CSV)

            # Check if use remote or local detection along with functionality
            if useVT and funcional:
                new_samples += 1
                CSV['Full_Analysis_Report'] = url_sandbox
                vt_positives = malware_detection_VT(sample_report, CSV)
                if vt_positives < detection_threshold:
                    break

            elif not useVT and funcional:
                print(
                    '# Malware Classifier # Round {} # Perturbation {} of {} #\n'
                    .format(r + 1, int(CSV['Perturbations']), n))
                # Check if mutation is detected
                start = time()
                mutation = CSV['Perturbations'] + '_m.exe'
                print('Running detection for:', mutation)
                detected = malware_detection(mutation, scanner)
                new_samples += save_file_database(detected, mutation,
                                                  url_sandbox, CSV, scanner)

            elif not funcional:
                new_corrupt_samples += 1

            if r == rounds - 1:
                print('\n## Summary ##')

            if new_samples == files_expected:
                break

        print('Evasive mutations found: {}'.format(new_samples))
        print('Corrupt mutations found: {}'.format(new_corrupt_samples))
        n += 1

    return new_samples, new_corrupt_samples
Exemplo n.º 2
0
def armed2(bin_bytes, sample, n, rounds, files_expected, scanner):
    '''
		ARMED-II: Automatic Random Malware Modifications to Evade Detection -- Incremental Iterations
		This function injects random perturbations sequentially to input PE malware 
		in order to find adversarial examples. After each injection, the malware 
		mutation will be tested for functionality and evasion. 
		
		Input: 
			bin_bytes: binaries from input malware sample
			sample: malware sample in terminal
			n: number of perturbations to inject
			rounds: number of rounds to run when searching for evasions
			files_expected: number of malware mutations expected as output
			scanner: commercial AV or malware model classifier 
	'''

    # Create a dict with all perturbations
    actions = f.actions_vector(m.ACTION_TABLE.keys())

    # Get VT detections for original sample to use as benchmark
    hash_sample = f.hash_files(sample)
    sample_report = f.get_report_VT(hash_sample, rescan=False)
    #sample_report = {'positives': 49, 'total': 66} # Debug mode (without VT/offline)

    # Inject perturbations and check for detection
    chosen_actions = [None] * n
    new_mutations = 0
    for x in range(n):

        for r in range(rounds):

            # Create random action and add it to sequence
            random_actions = f.create_random_actions(actions, x + 1)
            chosen_actions[x] = random_actions[0]

            print(
                '\n### ARMED-II: Automatic Random Malware Modifications to Evade Detection ###\n'
            )
            print('# Manipulation Box # Round {} # Perturbation {} of {} #\n'.
                  format(r + 1, x + 1, n))

            # Call a recursive function to inject x perturbations on a given sample (Print = Perturbation: x+1)
            mod_sample = f.rec_mod_files(bin_bytes, actions, chosen_actions, x,
                                         x + 1)

            print('\n# Sandbox (Oracle) # Round {} # Perturbation {} of {} #'.
                  format(r + 1, x + 1, n))

            # Send the modified sample to sandbox to check functionality (not corrupt)
            json_send = f.send_local_sandbox(mod_sample)

            # Calculate hashes from original and modified sample
            mod_sample_hash = f.hash_files(mod_sample)

            # Collect info to writeCSV function
            CSV = f.collect_info_CSV(sample, sample_report, x, chosen_actions,
                                     mod_sample_hash, hash_sample)

            # Malware analysis & malware detection stages
            useVT = False
            funcional = False
            funcional, url_sandbox = malware_analysis(mod_sample, json_send,
                                                      useVT, CSV)

            # Increase number of mutations to match -m given based on local checks
            if funcional:
                print(
                    '# Malware Classifier # Round {} # Perturbation {} of {} #\n'
                    .format(r + 1, int(CSV['Perturbations']), n))
                #Check if mutations is detected
                start = time()
                mutation = CSV['Perturbations'] + '_m.exe'
                print('Running detection for:', mutation)
                detected = malware_detection(mutation, scanner)
                new_mutations += save_file_database(detected, mutation,
                                                    url_sandbox, CSV, scanner)

            if new_mutations == files_expected:
                break

    # Show time
    print('Evasive mutations found: {}'.format(new_mutations))
Exemplo n.º 3
0
    def _generation(self, mutation):

        # Call selection before breeding
        self.selection()

        # Breeding & mutating and adding children to the members list for Selection afterwards
        children = self.members[0].mate(self.members[1])
        children[0].mutate(0.1)
        children[1].mutate(0.1)
        self.members[-2] = children[0]
        self.members[-1] = children[1]
        gene_num = 0
        for member in self.members:
            existing_member = False

            # If mutation was processed retrieve fitness value & avoid processing again
            for x in range(len(self.mutations_processed)):
                if self.mutations_processed[x][0] == member.code:
                    member.cost = self.mutations_processed[x][1]
                    #print('\nFitness: {}'.format(member.cost))
                    existing_member = True
                    break

            if not existing_member:

                # First generation calculates all genes, then breeds+mutates 2 members per generation
                gene_num += 1
                if self.generationNumber == 1:
                    print(
                        '# Calculating fitness for gene {} of {}: {} #'.format(
                            gene_num, len(self.members), member.code))
                else:
                    print('# Calculating fitness for child {}: {} #\n'.format(
                        gene_num, member.code))

    # Inject children sequences to S to create four S'
                mod_sample = f.rec_mod_files(mutation['Malware_Bytes'],
                                             mutation['Actions'], member.code,
                                             len(member.code) - 1,
                                             len(member.code))

                # Call functionality test
                json_send = f.send_local_sandbox(mod_sample)

                # Get VT detections for original sample to save in db
                sample_report = f.get_report_VT(mutation['hash_sample'],
                                                rescan=False)
                #sample_report = {'positives': 49, 'total': 66} # Debug mode (without VT/offline)

                # Collect info to writeCSV function
                mod_sample_hash = f.hash_files(mod_sample)
                CSV = f.collect_info_CSV(mutation['Malware_Sample'],
                                         sample_report,
                                         len(member.code) - 1, member.code,
                                         mod_sample_hash,
                                         mutation['hash_sample'])

                # Analyze functionality results (Set UseVT to VirusTotal report)
                useVT = False
                CSV['Perturbations'] = str(len(member.code))
                funcional, url_sandbox = i.malware_analysis(
                    mod_sample, json_send, useVT, CSV)
                mutation_file = CSV['Perturbations'] + '_m.exe'

                #  Analyze detection results
                if funcional:
                    print('Running detection for gene:', member.code)
                    detected = i.malware_detection(mutation_file,
                                                   mutation['Scanner'])
                    self.new_evasions += i.save_file_database(
                        detected, mutation_file, url_sandbox, CSV,
                        mutation['Scanner'])

                    # Calculate difference between original sample and mutation
                    self.diff_samples = f.get_difference(
                        mutation['Malware_Sample'], mutation_file)
                    diff_adjusted = round(
                        self.diff_samples / 100000,
                        3)  # Constant empirically defined as test

                    # Set cost to S' instances
                    member.calcCost(detected, self.generationNumber,
                                    diff_adjusted)
                else:
                    # Send empty when corrupt
                    member.calcCost('', self.generationNumber, 0)
                    self.corrupt_mutations += 1

                self.mutations_processed.append((member.code, member.cost))

                print('Sequence: {} – Fitness: {}\n'.format(
                    member.code, member.cost))

        # Termination: number of evasions achieved or number of generations reach termination defined
        termination_per_generation = mutation[
            'Files_Expected']**2 if mutation['Files_Expected'] > 9 else 5
        if self.new_evasions >= mutation[
                'Files_Expected'] or self.generationNumber == termination_per_generation:
            return True

        print('# Evasive mutations found: {} #'.format(self.new_evasions))
        print('# Corrupt mutations found: {} #\n'.format(
            self.corrupt_mutations))

        self.generationNumber += 1
        return False