def mainVisualize(params={}): withhold = 0 #default value for params params = test.defParams(params) train_dir = "train" test_dir = "test" # TODO put the names of the feature functions you've defined above in this list ffs = [system_call_2gram_feats] #[system_call_count_feats] #ffs = [first_last_system_call_feats, system_call_count_feats] # extract features print "extracting training features..." time1 = time.clock() X_train, t_train, train_ids, X_test, t_test, test_ids = test.loadData( params, withhold, ffs) time2 = time.clock() print "done extracting %d training features, time: %.4f s" % ( X_train.shape[1], time2 - time1) print import matplotlib.pyplot as plt import matplotlib.cm as cm plt.matshow(X_train.T.toarray(), cmap=cm.get_cmap('Reds')) plt.colorbar() plt.show()
def mainTest(withhold=0, params={}): #default value for params params = test.defParams(params) train_dir = "train" test_dir = "test" # TODO put the names of the feature functions you've defined above in this list ffs = [system_call_count_feats, system_call_2gram_feats] #ffs = [first_last_system_call_feats, system_call_count_feats] # extract features print "extracting training features..." time1 = time.clock() X_train, t_train, train_ids, X_test, y_test, test_ids = test.loadData( params, withhold, ffs) time2 = time.clock() print "done extracting %d training features, time: %.4f s" % ( X_train.shape[1], time2 - time1) print #preds = methods.logRegress(X_train,t_train,X_test) #preds = methods.decisionTree(X_train,t_train,X_test) #preds = methods.randomForest(X_train,t_train,X_test) preds = methods.extraTrees(X_train, t_train, X_test) if withhold != 0: print testCatAcc(preds, y_test) if params['writePredict'] == True: print "writing predictions..." util.write_predictions(preds, test_ids, params['outputFile']) print "done!"
def mainVisualizeFeatures(params={}): withhold = 0 #default value for params params = test.defParams(params) train_dir = "train" test_dir = "test" # TODO put the names of the feature functions you've defined above in this list ffs = [system_call_2gram_feats] #[system_call_count_feats] #ffs = [first_last_system_call_feats, system_call_count_feats] # extract features print "extracting training features..." time1 = time.clock() X_train, global_feat_dict, t_train, train_ids = extract_feats( ffs, train_dir) time2 = time.clock() print "done extracting %d training features, time: %.4f s" % ( X_train.shape[1], time2 - time1) print feature_data = np.zeros((len(global_feat_dict), 2)) feature_names = [] for (feature, index) in global_feat_dict.iteritems(): feature_data[index][0] = X_train[:, index].mean() feature_data[index][1] = X_train[:, index].toarray().std() feature_names.append(feature) import matplotlib.pyplot as plt import matplotlib.cm as cm ind = np.arange(len(global_feat_dict)) feature_data.sort(axis=0) plt.bar(ind, feature_data[:, 0], yerr=feature_data[:, 1]) plt.xticks(ind, feature_names, rotation='vertical') plt.show()