def plot_apiImportNum(): f = 'api_import_nb' cols = util.LABELS + [f] df = util.load_wildlab_df() cols = [c for c in cols if c in df.columns] df = df[cols] plot_feature_dist_packers(df, f, cdf=-1, x_max=80)
def adv_onlyOnePacker(modelpath, confspath, featurespath): global clf, benign_feature_names, features_df cur_dir = os.path.dirname(os.path.abspath(__file__)) confspath = os.path.abspath(os.path.join(cur_dir, confspath)) res = util.read_json(confspath) featurespath = os.path.abspath(os.path.join(cur_dir, featurespath)) features = util.read_json(featurespath) features = [[w, f] for w, f in zip(features['weights'], features['features'])] features_sorted = sorted(features, reverse=True) feature_names_sorted = [f for _, f in features_sorted] feature_weights_sorted = [w for w, _ in features_sorted] clf = joblib.load(modelpath) assert res and clf confs = json.loads(res['0.5']['1.0']['confidence']) # feature_names = json.loads(json.loads(res['1.0']['1.0']['features'])) malconfs = { id: val for id, val in confs.items() if val['label'] == 1 and val['predict'] == 1 } lowconfs = {id: v for id, v in malconfs.items() if v['conf'] <= 0.6} global df, dfb, dfm df = util.load_wildlab_df() features_df = [f for f in df.columns if f in feature_names_sorted] # df = df[df.packer_name == packer] dfb = df[df.benign] dfm = df[df.malicious] benign_features = get_benign_features(feature_names_sorted, feature_weights_sorted, dfb, dfm) benign_feature_weights = [w for w, _, _, _ in benign_features] benign_feature_names = [f for _, f, _, _ in benign_features] data = [] for sample_id, value in malconfs.items(): cur_conf = confs[sample_id]['conf'] data.append([sample_id, cur_conf]) print("generating adv. samples for {} samples".format(len(data))) with multiprocessing.Pool() as p: res = p.map(attack, data) res = { sample_id: { 'log': r, 'initConf': cur_conf, 'finalConf': final_conf, 'minChanges': min_changes, 'maxChanges': max_changes } for r, cur_conf, final_conf, sample_id, min_changes, max_changes in res } resdir = '../../../results/paper/experiments/exp-adversarial' if not os.path.exists(resdir): os.makedirs(resdir) with open('{}/malconfs-adv.json'.format(resdir), 'w') as f: json.dump(res, f)
def load_data(respath): df = util.load_wildlab_df() df = balance_per_packer(df) global packer_codes packer_codes = {} i = 0 for p in sorted(list(df.packer_name.unique())): i += 1 packer_codes[p] = i df['packer_name'] = [packer_codes[p] for p in df['packer_name']] df = exp_util.label_encode(df, respath) l = len(df) train_l = int(l * 0.7) train_x = df.sample(train_l, random_state=SEED) test_x = df[~df.index.isin(train_x.index)] test_y = test_x['packer_name'] train_y = train_x['packer_name'] train_x = train_x.drop(columns=DROP_COLUMNS, axis=1, errors='ignore') test_x = test_x.drop(columns=DROP_COLUMNS, axis=1, errors='ignore') return train_x, train_y, test_x, test_y
def get_common_features(): from collections import Counter features = Counter() packers = [] for p in util.PACKERS: if p in [ 'none', 'dolphin-dropper-3', 'themida-v2', 'telock', 'kkrunchy' ]: continue packers += [p] respath = '../../results/paper/experiments/exp-singlePacker/rf/lab-v3/{}/import/exp.db.json'.format( p) tmp = get_top_features(respath, 50) for t in tmp: features[t] += 1 features = features.most_common(11) features = [f for f, _ in features if f != 'api_import_nb'] df = util.load_wildlab_df() cols = util.LABELS cols = [d for d in df.columns if d.startswith('imp_') or d in cols] df = df[cols] print(packers) for f in features: latex = '{} & '.format(f) for p in packers: dp = df[df.packer_name == p] db = dp[dp.benign] dm = dp[dp.malicious] x = len(db[db[f]]) y = len(dm[dm[f]]) latex += '\\textbf{' + str(x) + ' (' + str( round((x * 100.0) / len(db), 2)) + '\%)} & \\textbf{' + str(y) + ' (' + str( round((y * 100.0) / len(dm), 2)) + '\%)} & ' latex = latex[:-3] + " \\\\" print(latex)
import itertools import numpy as np import exp_util import sys sys.path.append('../') import util ratio_step = 10 rounds = 5 ratios = [r / 100 for r in range(0, 100 + ratio_step, ratio_step)] # for the main program iterations = list(itertools.product(*[ratios, [1.0], range(rounds)]))[:11] model_name = 'nn' dataframe = util.load_wildlab_df() columns = [c for c in util.LABELS if c in dataframe.columns] dataframe = dataframe[columns] res_dir = '{}/exp-labDiffPackedBenign/{}'.format(exp_util.RES_ROOT, model_name) util.make_dir(res_dir) database = '{}/exp.db'.format(res_dir) n_workers = 1 cores_per_worker = -1 sizes = dict(training_ratio=0.7, testing_packed_benign_ratio=0.5, testing_packed_malicious_ratio=1)
import exp_util import sys sys.path.append('../') import util rounds = 5 # for the main program iterations = list(itertools.product(*[[0.5], [0.75], range(rounds)])) model_name = sys.argv[2] features = exp_util.get_features_ctgs(sys.argv[3:]) if 'strings' in features: dataframe = util.load_wildlab_df(nocorrupted=False, noduplicate=True, vtagree=True, dpiagree=False, strings=True) else: dataframe = util.load_wildlab_df(nocorrupted=False, noduplicate=True, vtagree=True, dpiagree=False, strings=False) if features == 'all' or ['all'] == features: res_dir = '{}/exp-wild/{}/all'.format(exp_util.RES_ROOT, model_name) else: res_dir = '{}/exp-wild/{}/{}'.format(exp_util.RES_ROOT, model_name, '-'.join(sorted(features))) util.make_dir(res_dir)