Exemplo n.º 1
0
def comparing(bin_bytes, sample, n, rounds, files_expected,
              detection_threshold, scanner):
    '''
		This function compares ARMED and AIMED to assess random vs. evolutionary performance
		finding adversarial examples. The results will be stored on compare.csv
	'''

    # Run ARMED
    start_Total = time()
    start_ARMED = time()
    _, ARMED_corrupt_samples = armed(bin_bytes, sample, n, rounds,
                                     files_expected, detection_threshold,
                                     scanner)
    time_ARMED = f.time_me(start_ARMED)

    # Run AIMED
    size_population = 4
    start_AIMED = time()
    AIMED_new_evasions, AIMED_corrupt_files = aimed(bin_bytes, sample,
                                                    size_population, n,
                                                    files_expected, scanner)
    time_AIMED = f.time_me(start_AIMED)

    # Update CSV with comparison data
    Compare_CSV = {}
    fields_compare = [
        'Sample', 'Perturbations', 'Module 1', 'Time M1', 'Files M1',
        'Corr M1', 'Module 2', 'Time M2', 'Files M2', 'Corr M2', 'Total Time'
    ]
    Compare_CSV['Sample'], Compare_CSV['Perturbations'], Compare_CSV['Module 1'], Compare_CSV['Time M1'], Compare_CSV['Files M1'], \
    Compare_CSV['Corr M1'], Compare_CSV['Module 2'], Compare_CSV['Time M2'], Compare_CSV['Files M2'], Compare_CSV['Corr M2'], Compare_CSV['Total Time'] = \
    sample, n, 'ARMED', time_ARMED, files_expected, ARMED_corrupt_samples, 'AIMED', time_AIMED, AIMED_new_evasions, AIMED_corrupt_files, strftime('%H:%M:%S', gmtime(time() - start_Total))
    f.write_dict_CSV('db/compare.csv', Compare_CSV, fields_compare)

    # Update short version CSV with time averages to use as input in LaTeX
    f.comparing_AXMED()
Exemplo n.º 2
0
def main(option, scanner):

    # Defining paths
    mod_path = "samples/mod/"
    evasion_path = "samples/successful/"
    detected_path = "samples/successful/detected/"

    # Argument parsing & displaying __doc__
    parser = ArgumentParser(description=__doc__)
    parser.add_argument("-s",
                        dest="myFilenameVariable",
                        required=False,
                        help="malware sample as input",
                        metavar="sample")
    parser.add_argument("-p",
                        dest="myFilenameVariable",
                        required=True,
                        help="number of perturbations to inject",
                        metavar="perturbations")
    parser.add_argument("-r",
                        dest="myFilenameVariable",
                        required=False,
                        help="number of rounds to run",
                        metavar="rounds")
    parser.add_argument("-m",
                        dest="myFilenameVariable",
                        required=True,
                        help="number of manipulated files expected",
                        metavar="mutations exp.")
    parser.add_argument("-t",
                        dest="myFilenameVariable",
                        required=False,
                        help="run until detections are below threshold",
                        metavar="detection thresh.")
    args = parser.parse_args()

    # Processing input from terminal
    sample, n, rounds, files_expected, detection_threshold = i.handling_input(
        sys.argv)

    # Convert malware sample into binaries
    bin_bytes = f.readfile(sample)

    # ARMED: Fixed length of sequence -- Using remote/local sandbox (HT/Cuckoo) + remote (VT)/local detection
    if option == 'ARMED':
        start_ARMED = time()
        i.armed(bin_bytes, sample, n, rounds, files_expected,
                detection_threshold, scanner)
        f.time_me(start_ARMED)

    # ARMED II: Incremental Iterations of perturbations' sequence -- Using local sandbox + local detection
    elif option == 'ARMED2':
        start_ARMED2 = time()
        i.armed2(bin_bytes, sample, n, rounds, files_expected, scanner)
        f.time_me(start_ARMED2)

    # AIMED: Fixed length & optimized order of perturbations -- GP with local sandbox + detection
    elif option == 'AIMED':
        size_population = 4  # & n = length_sequence (number of perturbations)
        start_AIMED = time()
        i.aimed(bin_bytes, sample, size_population, n, files_expected, scanner)
        f.time_me(start_AIMED)

    # COMPARE: Examine intelligent evolutionary algorithm against random (AIMED vs ARMED)
    elif option == 'COMPARE':
        start_COMPARE = time()
        i.comparing(bin_bytes, sample, n, rounds, files_expected,
                    detection_threshold, scanner)
        f.time_me(start_COMPARE)
Exemplo n.º 3
0
def malware_detection(mutation, snapshot):
    ''' 
		Detecting malware samples using local scanners.
		Use malware classifiers from industry or academia: 
		Gradient Boosting [Anderson et al. 2018]: Trained with 100k
		malicious and benign samples and achieves ROC-AUC = 0.993
		Threshold of 0.9 correponds to 1% FPR at 90% TPR
		A functionality (beta)-test has been added that overcomes the
		processing time of Cuckoo by 1/3 reducing from 45 to 15 s.
	'''

    start = time()
    detect = False
    vm = "Windows7-Detection"
    path_m = 'e:\\mod\\'

    if not (snapshot == 'GradientBoosting' or snapshot == 'Functionality'):
        print('Engines supported: GradientBoosting')
        sys.exit()

    # Pre-trained Gradient Boosting Model
    if snapshot == 'GradientBoosting':
        av_model = f.load_av('data/gradient_boosting.pkl')
        bin_bytes = f.readfile(mod_path + mutation)
        score = f.get_score_local(bin_bytes, av_model)
        if score > 0.9:  # As per paper
            print('\nMalware detected.\n')
            return True
        else:
            print('\nSample not detected.\n')
            return False

    # Start & restore the VM (headless = invisible)
    state = check_output(['VBoxManage', 'showvminfo', vm]).decode('UTF-8')
    if "powered off" in state or "saved" in state:
        call([
            'VBoxManage', 'snapshot', vm, 'restore',
            'Windows7-' + snapshot + '-Ready'
        ])
        call(['VBoxManage', 'startvm', vm, '--type', 'headless'])
    elif "paused" in state:
        call(['VBoxManage', 'controlvm', vm, 'resume', '--type', 'headless'])

    try:

        # Beta-test to check functionality (Reduces time of Cuckoo by 1/3 but needs further testing)
        if snapshot == "Functionality":
            try:
                status = check_output([
                    'timeout', '10', 'VBoxManage', 'guestcontrol', vm,
                    '--username', 'user', '--password', 'sandbox', 'run',
                    '--exe', path_m + mutation
                ])

            except Exception as err:
                if 'returned non-zero exit status 1.' in str(err):
                    print('\nMutation corrupt!\n')
                    valid = False
                else:
                    print('\nMutation WORKING!\n')
                    valid = True
                return valid

    except CalledProcessError as err:
        state = err

    # Terminate the running process
    if snapshot != "Functionality":
        s.kill()

    # Pause the VM – Use pause only if power-off is on main()
    #call(['VBoxManage', 'controlvm', vm, 'pause', '--type', 'headless'])

    # Power off the VM
    call(['VBoxManage', 'controlvm', vm, 'poweroff'])

    # Show total time in hh:mm:ss
    f.time_me(start)

    return detect
Exemplo n.º 4
0
def malware_detection_VT(sample_report, CSV):
    '''
		Detecting malware samples using VirusTotal (remote)
		
		Input: 
			sample_report: the number of VT detections to use as benchmark
	'''

    loops = 0
    limit = 20
    start = time()

    # Comparing detections of both samples
    print('\n# Malware Detection Stage #')
    print('\nOriginal sample:')
    print('Detected by {} out of {} engines \n'.format(
        sample_report['positives'], sample_report['total'])
          )  #, (sample_report['positives']/sample_report['total'])*100))
    print(sample_report['permalink'])
    print('\nStatus:')

    # Use loops and sleep to keep requests lows and avoid API banned by VT (Limit: 100)
    while loops < limit:
        try:
            # Getting report of sample submitted via VT API - Rescan: False
            report = f.get_report_VT(CSV['Mod_File_Hash'], False)

            # Check the status of sample & report
            if report['response_code'] == -2:
                print('The sample is queued for analysis. Next update in 60 s')
                sleep(60)

            elif report['response_code'] == 1:
                print('\nResults: New sample found')
                print('\nDetected by {} out of {} engines \n'.format(
                    report['positives'],  #({:.2f}%)
                    report['total'])
                      )  #, (report['positives']/report['total'])*100))

                # Print only engines detecting new sample
                av_detect = {
                    key: val
                    for key, val in report['scans'].items()
                    if val['detected'] == 1
                }
                print(list(av_detect.keys()))

                # Provide link to sample detections report
                print('\n{}'.format(report['permalink']))

                # Calculate evasion rate based on original sample detections and print summary
                print('\n## Summary ##')
                print('\nEvasion rate: {:.2f}% of previous engines'.format(
                    (1 - (report['positives'] / report['total']) /
                     (sample_report['positives'] / sample_report['total'])) *
                    100))
                #print('\nEvasion rate: {:.2f}% of engines'.format((sample_report['positives']/
                #sample_report['total']-report['positives']/report['total'])*100))

                # Show detection time in hh:mm:ss
                f.time_me(start)

                # Copy successful sample into evasion path
                now = datetime.now()
                name_file = str(now.year) + str(now.month) + str(
                    now.day) + str(now.hour) + str(now.minute) + str(
                        now.second)
                copyfile(mod_path+CSV['Perturbations']+'_m.exe', \
                evasion_path+CSV['Perturbations']+'m_'+name_file+'.exe')

                # Update database with sample's info
                CSV['Manipulated_File'], CSV['MF_Detections'], CSV['Full_Detections_Report'], \
                CSV['Date_Reported'] = evasion_path+CSV['Perturbations']+'m_'+ \
                name_file+'.exe', str(report['positives'])+'/'+str(report['total']), \
                str(report['permalink']), str(report['scan_date'])
                f.write_dict_CSV('db/database.csv', CSV, fields)

                return report['positives']

            else:  # 'response_code' == 0:
                print("Sample is not present in VirusTotal's dataset")
                sleep(60)
            loops += 1

        except (requests.ConnectionError, requests.Timeout,
                requests.ConnectTimeout) as e:
            print('Connection issues or API requests threshold reached: {}'.
                  format(e))
Exemplo n.º 5
0
def malware_analysis_HA(mod_sample, json_send_HA, CSV):
    '''
		Analyze malware using remote service Hybrid Analysis
	'''

    loops = 0
    start = time()
    functionality = False

    # Wait a few minutes if server did not accept further submissions
    while json_send_HA == 429:
        print('Submission quota limit has been exceeded. Retry in 5 minutes.')
        sleep(301)

# Retrieve report from Hybrid Analisys sandbox: report URL + Hash + Job ID
    url_sample = 'https://www.reverse.it/sample/' + json_send_HA[
        'sha256'] + '/' + json_send_HA['job_id']
    print('\nFull report: {}\n\nStatus:'.format(url_sample))

    # Use loops and sleep to keep requests low and avoid API banned by HA (Limit: 5/m)
    limit = 30
    while loops < limit:
        try:
            # Server could return 403
            if f.url_ok(url_sample) == 200 or f.url_ok(url_sample) == 403:
                report_HA = f.get_summary_HA(json_send_HA['sha256'])
                if report_HA['state'] == 'ERROR':
                    print('The sandbox environment returned {}.'.format(
                        report_HA['error_type']))
                    break
                elif report_HA['state'] == 'IN_QUEUE':
                    print(
                        'Waiting in queue to be analyzed. Next update in 60 s')
                elif report_HA['state'] == 'IN_PROGRESS':
                    print('Analysis in progress..')
                elif report_HA['state'] == 'SUCCESS':
                    print('Analysis finished.')
                    break
                    sleep(60)
            else:
                print('Website not reachable. Next update in 30 s')
                sleep(30)

            if loops == limit - 1:
                print(
                    'ARMED exited because the limit of {} minutes has been reached.\n'
                    .format(limit))
                quit()

            loops += 1

        except (requests.ConnectionError, requests.Timeout,
                requests.ConnectTimeout) as e:
            print('Connection issues or API requests reached:\n{}'.format(e))

# Check the likelihood that malware runs based on report
    if report_HA['domains'] or report_HA['compromised_hosts']:
        functionality = True
        print('\nResults: WORKING')
        print('Malware connects to domains or contacts hosts.')

        # Show analysis time in hh:mh:ss
        f.time_me(start)

        # Send to VT to check detections
        print('Sent to VirusTotal!')
        json_send_VT = f.send_VT(mod_sample)

    else:
        if report_HA['state'] != 'ERROR':
            print('\nResults: Most likely not working')
            print('Check if manipulated sample runs before scanning.')
            print('Malware does not connect to domains or contacts hosts.')

            # Copy sample into failed path & tag with F
            now = datetime.now()
            name_file = str(now.year) + str(now.month) + str(now.day) + str(
                now.hour) + str(now.minute)
            copyfile(mod_path+CSV['Perturbations']+'_m.exe', \
            fail_path+CSV['Perturbations']+'F_'+name_file+'.exe')

            # Update database with basic sample's info
            CSV['Manipulated_File'], CSV['Full_Analysis_Report'] \
            = fail_path+CSV['Perturbations']+'F_'+name_file+'.exe', url_sample
            f.write_dict_CSV('db/fail_database.csv', CSV, fields)

            # Show analysis time in hh:mh:ss
            f.time_me(start)

    return functionality, url_sample
Exemplo n.º 6
0
def malware_analysis(mod_sample, json_send, useVT, CSV):
    '''
		Analyze malware with sandbox Cuckoo
		
		Input: 
			mod_sample: Compiled version of modified malware mutation 
			json_send: JSON status after sending mutation to local sandbox for analysis
			useVT: Boolean value indicating whether VirusTotal is used or detection will be performed locally
			CSV: Data structure with information to save on DB 
	'''

    loops = 0
    start = time()
    functionality = False

    # Show report from analisys sandbox: report URL + Job ID
    url_sample = 'http://localhost:8000/analysis/' + str(
        json_send['task_id']) + '/summary'
    print('\nFull analysis report: {}\n\nStatus:'.format(url_sample))

    # Using sleep in loop to space requests to sandbox may improve results
    firstPrintR, firstPrintW, firstPrintRep = True, True, True
    while True:
        try:
            v = f.get_summary_local_sandbox(json_send['task_id'], 'view')
            view_status = v['task']['status']
            if view_status == 'completed' and firstPrintRep:
                print('Analysis finished. Generating report..')
                firstPrintRep = False
            elif view_status == 'pending' and firstPrintW:
                print('Waiting in queue to be analyzed..')
                firstPrintW = False
            elif view_status == 'running' and firstPrintR:
                print('Analysis in progress..')
                firstPrintR = False
            elif view_status == 'reported':
                print('Report finished.')
                break
            sleep(0.2)

        except (requests.ConnectionError, requests.Timeout,
                requests.ConnectTimeout) as e:
            print('Connection issues or API not available:\n{}'.format(e))

# Check the likelihood that malware runs based on report
    err = 'CuckooPackageError: Unable to execute the initial process, analysis aborted.\n'
    r = f.get_summary_local_sandbox(json_send['task_id'], 'report')
    report = r['debug']['cuckoo']
    duration = r['info']['duration']
    if err not in report and duration >= 15:
        functionality = True
        print('\nResults: WORKING')

        # Show analysis time in hh:mh:ss
        f.time_me(start)

        # Send to VT for detections (activate if local detection is not used)
        if useVT:
            print('Sending to VirusTotal!')
            json_send_VT = f.send_VT(mod_sample)

    elif err not in report and duration < 15:
        print(
            '\nResults: It could not be determined (score = {} – duration = {})'
            .format(r['info']['score'], duration))

        # Show analysis time in hh:mh:ss
        f.time_me(start)

    elif err in report:
        print('\nResults: Mutation is corrupt')

        # Copy sample into failed path & tag with letter F
        now = datetime.now()
        name_file = str(now.year) + str(now.month) + str(now.day) + str(
            now.hour) + str(now.minute)
        copyfile(mod_path+CSV['Perturbations']+'_m.exe', \
        fail_path+CSV['Perturbations']+'F_'+name_file+'.exe')

        # Update database with basic sample's info
        CSV['Manipulated_File'], CSV['Full_Analysis_Report'], CSV['Date_Reported']  \
        = fail_path+CSV['Perturbations']+'F_'+name_file+'.exe', url_sample, str(datetime.now())
        f.write_dict_CSV('db/corrupted.csv', CSV, fields)

        # Show analysis time in hh:mh:ss
        f.time_me(start)

    return functionality, url_sample