def _get_data(data_pth=None, data_name=None, direction='src_dst', feat='iat_size', header=False, overwrite=False): """Load data from data_pth if data_path exists, otherwise, generate data from pcap fiels Parameters ---------- data_pth: data_name direction feat header overwrite Returns ------- X: features y: labels """ if overwrite: if pth.exists(data_pth): os.remove(data_pth) if not pth.exists(data_pth): data_pth = generate_data_speed_up(data_name, feat_type=feat, header=header, direction=direction, out_file=data_pth, overwrite=overwrite) return load_data(data_pth)
def _get_data(data_pth=None, data_name=None, direction='src_dst', feat='iat_size', header=False, overwrite=False): if overwrite: if pth.exists(data_pth): os.remove(data_pth) if not pth.exists(data_pth): data_pth = generate_data_speed_up(data_name, feat_type=feat, header=header, direction=direction, out_file=data_pth, overwrite=overwrite) return load_data(data_pth)
def minimal_model_cost(model_name, model_params_file, test_file, params, project_params_file, is_parallel=False): ####################################################################################################### # 1. create a new model from saved parameters params = {'is_kjl': False, 'is_nystrom': False} if 'OCSVM' in model_name: ####################################################################################################### # load params model_params = load_data(model_params_file) ####################################################################################################### # create a new model # 'OCSVM(rbf)': oc = OCSVM() oc.kernel = model_params['kernel'] oc._gamma = model_params[ '_gamma'] # only used for 'rbf', 'linear' doeesn't need '_gamma' oc.gamma = oc._gamma oc.support_vectors_ = model_params['support_vectors_'] oc._dual_coef_ = model_params[ '_dual_coef_'] # Coefficients of the support vectors in the decision function. oc.dual_coef_ = oc._dual_coef_ oc._intercept_ = model_params['_intercept_'] oc.intercept_ = oc._intercept_ oc.support_ = np.zeros( oc.support_vectors_.shape[0], dtype=np.int32 ) # np.empty((1,), dtype=np.int32) # # model_params['support_'] # Indices of support vectors. oc._n_support = model_params[ '_n_support'] # Number of support vectors for each class. oc._sparse = model_params[ '_sparse'] # spare_kernel_compute or dense_kernel_compute oc.shape_fit_ = model_params[ 'shape_fit_'] # to check if the dimension of train set and test set is the same. oc.probA_ = np.zeros( 0 ) # model_params['probA_'] # /* pairwise probability information */, not used. its values = []. oc.probB_ = np.zeros( 0 ) # model_params['probB_'] # /* pairwise probability information */, not used its values = []. oc.offset_ = -1 * model_params[ '_intercept_'] # model_params['offset_'] project = None if 'KJL' in model_name: # KJL-OCSVM # load params project_params = load_data(project_params_file) project = KJL(None) project.sigma = project_params['sigma'] project.Xrow = project_params['Xrow'] project.U = project_params['U'] params['is_kjl'] = True elif 'Nystrom' in model_name: # Nystrom-OCSVM # load params project_params = load_data(project_params_file) project = NYSTROM(None) project.sigma = project_params['sigma'] project.Xrow = project_params['Xrow'] project.eigvec_lambda = project_params['eigvec_lambda'] params['is_nystrom'] = True elif 'GMM' in model_name: ####################################################################################################### # load params model_params = load_data(model_params_file) # GMM params oc = GMM() oc.covariance_type = model_params['covariance_type'] oc.weights_ = model_params['weights_'] oc.means_ = model_params['means_'] # oc.precisions_ = model_params['precisions_'] oc.precisions_cholesky_ = model_params['precisions_cholesky_'] project = None if 'KJL' in model_name: # KJL-GMM # load params project_params = load_data(project_params_file) project = KJL(None) project.sigma = project_params['sigma'] project.Xrow = project_params['Xrow'] project.U = project_params['U'] params['is_kjl'] = True elif 'Nystrom' in model_name: # Nystrom-GMM # load params project_params = load_data(project_params_file) project = NYSTROM(None) project.sigma = project_params['sigma'] project.Xrow = project_params['Xrow'] project.eigvec_lambda = project_params['eigvec_lambda'] params['is_nystrom'] = True else: raise NotImplementedError() ####################################################################################################### # 2. load test set and evaluate the model X_test, y_test = load_data(test_file) # Evaluation # average time # minimal model cost num = 1 if is_parallel: with Parallel(n_jobs=10, verbose=0, backend='loky', pre_dispatch=1, batch_size=1) as parallel: # outs = parallel(delayed(_test)(oc, X_test, y_test, params=params, project=project) for _ in range(num)) outs = parallel( delayed(_test)(copy.deepcopy(oc), copy.deepcopy(X_test), copy.deepcopy(y_test), params=copy.deepcopy(params), project=copy.deepcopy(project)) for _ in range(num)) auc, test_time = list(zip(*outs)) auc = np.mean(auc) test_time = np.mean(test_time) else: auc = [] test_time = [] for _ in range(num): # auc_, test_time_ = _test(oc, X_test, y_test, params, project) auc_, test_time_ = _test(copy.deepcopy(oc), copy.deepcopy(X_test), copy.deepcopy(y_test), params=copy.deepcopy(params), project=copy.deepcopy(project)) auc.append(auc_) test_time.append(test_time_) auc = np.mean(auc) test_time = np.mean(test_time) return auc, X_test, test_time